1# mypy: allow-untyped-decorators 2# mypy: allow-untyped-defs 3# Owner(s): ["module: tests"] 4 5import torch 6import torch.utils.data 7import numpy as np 8 9import contextlib 10import gc 11import io 12import inspect 13import itertools 14import math 15import random 16import re 17import copy 18import os 19import tempfile 20import unittest 21import warnings 22import types 23import pickle 24import textwrap 25import subprocess 26import weakref 27import sys 28import copyreg 29from torch import inf, nan 30from itertools import product, combinations, permutations, chain 31from functools import partial 32from torch import multiprocessing as mp 33from torch.testing import make_tensor 34from torch.testing._internal.common_optimizers import ( 35 optim_db, optims, _get_optim_inputs_including_global_cliquey_kwargs) 36 37from torch.testing._internal.common_utils import ( # type: ignore[attr-defined] 38 TEST_WITH_TORCHINDUCTOR, TEST_WITH_ROCM, run_tests, IS_JETSON, 39 IS_WINDOWS, IS_FILESYSTEM_UTF8_ENCODING, NO_MULTIPROCESSING_SPAWN, 40 IS_SANDCASTLE, IS_FBCODE, IS_REMOTE_GPU, skipIfTorchInductor, load_tests, slowTest, slowTestIf, 41 TEST_WITH_CROSSREF, skipIfTorchDynamo, skipRocmIfTorchInductor, set_default_dtype, 42 skipCUDAMemoryLeakCheckIf, BytesIOContext, 43 skipIfRocm, skipIfNoSciPy, TemporaryFileName, TemporaryDirectoryName, 44 wrapDeterministicFlagAPITest, DeterministicGuard, CudaSyncGuard, 45 bytes_to_scalar, parametrize, skipIfMps, noncontiguous_like, 46 AlwaysWarnTypedStorageRemoval, TEST_WITH_TORCHDYNAMO, xfailIfTorchDynamo) 47from multiprocessing.reduction import ForkingPickler 48from torch.testing._internal.common_device_type import ( 49 expectedFailureMeta, 50 expectedFailureXLA, 51 instantiate_device_type_tests, 52 onlyCUDA, onlyCPU, 53 dtypes, dtypesIfCUDA, dtypesIfCPU, deviceCountAtLeast, 54 skipMeta, PYTORCH_CUDA_MEMCHECK, largeTensorTest, onlyNativeDeviceTypes, 55 get_all_device_types, skipXLA) 56from typing import Tuple 57import torch.backends.quantized 58import torch.testing._internal.data 59from torch.testing._internal.common_cuda import ( 60 tf32_on_and_off, tf32_is_not_fp32, TEST_CUDNN, TEST_MULTIGPU, 61 _create_scaling_case, _create_scaling_models_optimizers) 62from torch.testing._internal.common_mkldnn import bf32_on_and_off 63from torch.testing._internal.common_dtype import ( 64 floating_types_and, get_all_math_dtypes, all_types_and_complex_and, complex_types, 65 all_types_and, floating_types, floating_and_complex_types, integral_types_and, 66 get_all_qint_dtypes, 67) 68from torch.testing._internal.two_tensor import TwoTensor 69 70if TEST_WITH_TORCHINDUCTOR: 71 from torch._inductor.test_case import TestCase 72else: 73 from torch.testing._internal.common_utils import TestCase # type: ignore[assignment] 74 75 76# Protects against includes accidentally setting the default dtype 77assert torch.get_default_dtype() is torch.float32 78 79# load_tests from torch.testing._internal.common_utils is used to automatically filter tests for 80# sharding on sandcastle. This line silences flake warnings 81load_tests = load_tests 82 83AMPERE_OR_ROCM = TEST_WITH_ROCM or tf32_is_not_fp32() 84 85@contextlib.contextmanager 86def torch_vital_set(value): 87 stash = None 88 if 'TORCH_VITAL' in os.environ: 89 stash = os.environ['TORCH_VITAL'] 90 os.environ['TORCH_VITAL'] = value 91 try: 92 yield 93 finally: 94 if stash: 95 os.environ['TORCH_VITAL'] = stash 96 else: 97 del os.environ['TORCH_VITAL'] 98 99# Tests Vital Signs for Torch 100# FIXME: document or deprecate whatever this is 101class TestBasicVitalSigns(TestCase): 102 def test_basic_vitals(self): 103 with torch_vital_set(''): 104 self.assertFalse(torch.vitals_enabled()) 105 with torch_vital_set('ON'): 106 self.assertTrue(torch.vitals_enabled()) 107 108 def test_basic_vitals_read_write(self): 109 with torch_vital_set('ON'): 110 self.assertTrue(torch.vitals_enabled()) 111 # This tests the code path of setting a vital 112 self.assertTrue(torch.set_vital('Dataloader', 'basic_unit_test', 'TEST_VALUE_STRING')) 113 self.assertIn('TEST_VALUE_STRING', torch.read_vitals()) 114 self.assertIn('CUDA.used', torch.read_vitals()) 115 116 def test_dataloader_vitals(self): 117 with torch_vital_set('ON'): 118 inps = torch.arange(10 * 5, dtype=torch.float32).view(10, 5) 119 tgts = torch.arange(10 * 5, dtype=torch.float32).view(10, 5) 120 dataset = torch.utils.data.TensorDataset(inps, tgts) 121 loader = torch.utils.data.DataLoader(dataset, batch_size=2) 122 self.assertIn('Dataloader.enabled\t\t True', torch.read_vitals()) 123 124# FIXME: document or deprecate whatever this is 125class TestVitalSignsCuda(TestCase): 126 @onlyCUDA 127 def test_cuda_vitals_gpu_only(self, device): 128 with torch_vital_set('ON'): 129 self.assertIn('CUDA.used\t\t true', torch.read_vitals()) 130 131 132is_cuda_sm86 = torch.cuda.is_available() and torch.cuda.get_device_capability(0) == (8, 6) 133 134class TestTorchDeviceType(TestCase): 135 exact_dtype = True 136 137 # TODO: move all tensor creation to common ops 138 def _rand_shape(self, dim, min_size, max_size): 139 shape = [] 140 for i in range(dim): 141 shape.append(random.randint(min_size, max_size)) 142 return tuple(shape) 143 144 # Validates that mathematical constants are defined properly, as required by 145 # the Python Array API (https://data-apis.org/array-api/latest/API_specification/constants.html) 146 @onlyCPU 147 def test_constants(self, device): 148 self.assertIsInstance(torch.e, float) 149 self.assertEqual(torch.e, math.e, atol=0, rtol=0) 150 151 self.assertIsInstance(torch.pi, float) 152 self.assertEqual(torch.pi, math.pi, atol=0, rtol=0) 153 154 self.assertIsInstance(torch.nan, float) 155 self.assertEqual(torch.nan, math.nan, equal_nan=True) 156 157 self.assertIsInstance(torch.inf, float) 158 self.assertEqual(torch.inf, math.inf) 159 160 @onlyNativeDeviceTypes 161 @dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64, 162 torch.bool, torch.float32, torch.complex64, torch.float64, 163 torch.complex128, torch.uint16, torch.uint32, torch.uint64) 164 def test_bytes_to_scalar(self, device, dtype): 165 def rand_byte(): 166 if dtype == torch.bool: 167 return torch.randint(0, 2, ()).item() 168 else: 169 return torch.randint(0, 256, ()).item() 170 171 element_size = torch._utils._element_size(dtype) 172 173 for i in range(10): 174 bytes_list = [rand_byte() for _ in range(element_size)] 175 scalar = bytes_to_scalar(bytes_list, dtype, device) 176 self.assertEqual(scalar.storage().untyped().tolist(), bytes_list) 177 178 @dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64, 179 torch.bool, torch.float32, torch.complex64, torch.float64, 180 torch.complex128, torch.uint16, torch.uint32, torch.uint64) 181 def test_storage(self, device, dtype): 182 v = make_tensor((3, 5), dtype=dtype, device=device, low=-9, high=9) 183 self.assertEqual(v.storage()[0], v[0][0]) 184 self.assertEqual(v.storage()[14], v[2][4]) 185 v_s = v.storage() 186 187 for el_num in range(v.numel()): 188 dim0 = el_num // v.size(1) 189 dim1 = el_num % v.size(1) 190 self.assertEqual( 191 v_s[el_num], 192 v[dim0][dim1]) 193 194 v_s_byte = v.storage().untyped() 195 el_size = v.element_size() 196 197 for el_num in range(v.numel()): 198 start = el_num * el_size 199 end = start + el_size 200 dim0 = el_num // v.size(1) 201 dim1 = el_num % v.size(1) 202 self.assertEqual( 203 bytes_to_scalar(v_s_byte[start:end], dtype, device), 204 v[dim0][dim1]) 205 206 @onlyNativeDeviceTypes 207 @dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64, 208 torch.bool, torch.float32, torch.complex64, torch.float64, 209 torch.complex128, torch.quint8, torch.qint8, torch.qint32, 210 torch.quint4x2) 211 def test_storage_setitem(self, device, dtype): 212 # Skip quantized dtypes for CUDA, since they're not supported 213 if torch.device(device).type == 'cuda': 214 if dtype in [torch.quint8, torch.qint8, torch.qint32, torch.quint4x2]: 215 return 216 217 storage_type_name = torch.storage._dtype_to_storage_type_map()[dtype] 218 if torch.device(device).type == 'cuda': 219 storage_type = eval('torch.cuda.' + storage_type_name) 220 else: 221 storage_type = eval('torch.' + storage_type_name) 222 223 N = 10 224 225 s = storage_type(N) 226 s[:] = 0 227 l = [0] * N 228 self.assertEqual(s, storage_type(l)) 229 230 for i in range(N): 231 s[i] = i 232 l[i] = i 233 234 self.assertEqual(s, storage_type(l)) 235 236 l[2:7] = [1] * 5 237 s[2:7] = 1 238 self.assertEqual(s, storage_type(l)) 239 240 @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1991") 241 @onlyNativeDeviceTypes 242 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 243 def test_tensor_storage_type(self, device, dtype): 244 a = make_tensor((10,), dtype=dtype, device=device, low=-9, high=9) 245 246 module = torch.cuda if (torch.device(device).type == 'cuda') else torch 247 expected_storage_type = getattr(module, torch.storage._dtype_to_storage_type_map()[dtype]) 248 249 self.assertEqual(a.storage_type(), expected_storage_type) 250 251 @onlyNativeDeviceTypes 252 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16, torch.uint16, torch.uint32, torch.uint64)) 253 def test_tensor_from_storage(self, device, dtype): 254 a = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9) 255 a_s = a.storage() 256 b = torch.tensor(a_s, device=device, dtype=dtype).reshape(a.size()) 257 self.assertEqual(a, b) 258 c = torch.tensor(a_s.untyped(), device=device, dtype=dtype).reshape(a.size()) 259 self.assertEqual(a, c) 260 261 for error_dtype in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): 262 if error_dtype == dtype: 263 continue 264 with self.assertRaisesRegex(RuntimeError, r'Expected a Storage of type'): 265 error_storage = a.to(error_dtype).storage() 266 torch.tensor(error_storage, device=device, dtype=dtype) 267 268 @onlyNativeDeviceTypes 269 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 270 def test_set_storage(self, device, dtype): 271 a = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9) 272 a_s = a.storage() 273 b = torch.tensor([], device=device, dtype=dtype).set_(a_s).reshape(a.size()) 274 self.assertEqual(a, b) 275 c = torch.tensor([], device=device, dtype=dtype).set_(a_s.untyped()).reshape(a.size()) 276 self.assertEqual(a, c) 277 278 for error_dtype in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): 279 if error_dtype == dtype: 280 continue 281 with self.assertRaisesRegex(RuntimeError, r'Expected a Storage of type'): 282 error_storage = a.to(error_dtype).storage() 283 b = torch.tensor([], device=device, dtype=dtype).set_(error_storage) 284 285 def _check_storage_meta(self, s, s_check): 286 self.assertTrue( 287 isinstance(s, (torch.UntypedStorage, torch.TypedStorage)) and 288 isinstance(s_check, type(s)), 289 ( 290 's and s_check must both be one of UntypedStorage or ' 291 'TypedStorage, but got' 292 f' {type(s).__name__} and {type(s_check).__name__}')) 293 294 self.assertEqual(s.device.type, 'meta') 295 self.assertEqual(s.nbytes(), s_check.nbytes()) 296 self.assertEqual(s.size(), s_check.size()) 297 self.assertEqual(s.data_ptr(), 0) 298 299 with self.assertRaisesRegex(NotImplementedError, r'Not available'): 300 s[0] 301 302 if isinstance(s, torch.TypedStorage): 303 self.assertEqual(s.dtype, s_check.dtype) 304 self._check_storage_meta(s.untyped(), s_check.untyped()) 305 306 @onlyNativeDeviceTypes 307 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 308 def test_typed_storage_meta(self, device, dtype): 309 args_list = [ 310 [], 311 [0], 312 [100], 313 [[1, 2, 3, 4, 5, 6]], 314 ] 315 for args in args_list: 316 s_check = torch.TypedStorage(*args, dtype=dtype, device=device) 317 s = torch.TypedStorage(*args, dtype=dtype, device='meta') 318 self._check_storage_meta(s, s_check) 319 320 @onlyNativeDeviceTypes 321 def test_untyped_storage_meta(self, device): 322 args_list = [ 323 [], 324 [0], 325 [100], 326 [[1, 2, 3, 4, 5, 6]], 327 ] 328 for args in args_list: 329 s_check = torch.UntypedStorage(*args, device=device) 330 s = torch.UntypedStorage(*args, device='meta') 331 self._check_storage_meta(s, s_check) 332 333 @onlyNativeDeviceTypes 334 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 335 def test_storage_meta_from_tensor(self, device, dtype): 336 t_check = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9) 337 t = t_check.to('meta') 338 339 s_check = t_check.storage() 340 s = t.storage() 341 self._check_storage_meta(s, s_check) 342 343 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 344 def test_storage_meta_errors(self, device, dtype): 345 s0 = torch.TypedStorage([1, 2, 3, 4], device='meta', dtype=dtype) 346 347 with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'): 348 s0.cpu() 349 350 with self.assertRaisesRegex(RuntimeError, r'only available on CPU'): 351 s0._share_fd_cpu_() 352 353 with self.assertRaisesRegex(RuntimeError, r'only available on CPU'): 354 s0._share_filename_cpu_() 355 356 if torch.cuda.is_available(): 357 with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'): 358 s0.cuda() 359 360 with self.assertRaisesRegex(RuntimeError, r'only available on CUDA'): 361 s0._share_cuda_() 362 363 with self.assertRaisesRegex(TypeError, r"cannot pin 'torch.storage.UntypedStorage' only CPU memory can be pinned"): 364 s0.pin_memory() 365 366 with self.assertRaisesRegex(RuntimeError, r'only available on CPU'): 367 s0.share_memory_() 368 369 with self.assertRaisesRegex(NotImplementedError, r'Not available'): 370 s0.tolist() 371 372 with tempfile.NamedTemporaryFile() as f: 373 with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'): 374 s0._write_file(f, True, True, s0.element_size()) 375 376 for device in ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']: 377 s1 = torch.TypedStorage([1, 2, 3, 4], device=device, dtype=dtype) 378 379 with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'): 380 s1.copy_(s0) 381 382 @onlyCPU 383 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 384 def test_storage_meta_ok(self, device, dtype): 385 s0 = torch.TypedStorage([1, 2, 3, 4], device='meta', dtype=dtype) 386 387 # This is OK, it changes the meta storage size without allocating 388 s0.resize_(10) 389 390 @onlyCUDA 391 def test_module_share_memory(self): 392 # Test fix for issue #80733 393 # See https://github.com/pytorch/pytorch/issues/80733 394 model = torch.nn.Linear(3, 1) 395 model_cuda = model.to('cuda') 396 model.share_memory() 397 398 @dtypes(torch.float32, torch.complex64) 399 def test_deepcopy(self, device, dtype): 400 from copy import deepcopy 401 a = torch.randn(5, 5, dtype=dtype, device=device) 402 b = torch.randn(5, 5, dtype=dtype, device=device) 403 c = a.view(25) 404 q = [a, [a.storage(), b.storage()], b, c] 405 w = deepcopy(q) 406 self.assertEqual(w[0], q[0], atol=0, rtol=0) 407 self.assertEqual(w[1][0], q[1][0], atol=0, rtol=0) 408 self.assertEqual(w[1][1], q[1][1], atol=0, rtol=0) 409 self.assertEqual(w[1], q[1], atol=0, rtol=0) 410 self.assertEqual(w[2], q[2], atol=0, rtol=0) 411 412 # Check that deepcopy preserves sharing 413 w[0].add_(1) 414 for i in range(a.numel()): 415 self.assertEqual(w[1][0][i], q[1][0][i] + 1) 416 self.assertEqual(w[3], c + 1) 417 w[2].sub_(1) 418 for i in range(a.numel()): 419 self.assertEqual(w[1][1][i], q[1][1][i] - 1) 420 421 # Check that deepcopy preserves attributes 422 a.foo = 3 423 self.assertEqual(deepcopy(a).foo, 3) 424 425 @dtypes(torch.float32, torch.complex64) 426 def test_deepcopy_scalar(self, device, dtype): 427 from copy import deepcopy 428 a = torch.tensor(5, dtype=dtype, device=device) 429 self.assertEqual(a.size(), deepcopy(a).size()) 430 self.assertEqual(a, deepcopy(a)) 431 432 def check_internal_mem_overlap(self, inplace_op, num_inputs, 433 dtype, device, 434 expected_failure=False): 435 if isinstance(inplace_op, str): 436 inplace_op = getattr(torch.Tensor, inplace_op) 437 input = torch.randn(1, dtype=dtype, device=device).expand(3, 3) 438 inputs = [input] + [torch.randn_like(input) 439 for i in range(num_inputs - 1)] 440 if not expected_failure: 441 with self.assertRaisesRegex(RuntimeError, 'single memory location'): 442 inplace_op(*inputs) 443 else: 444 with self.assertRaises(AssertionError): 445 with self.assertRaisesRegex(RuntimeError, 'single memory location'): 446 inplace_op(*inputs) 447 448 def unary_check_input_output_mem_overlap(self, data, sz, op, 449 expected_failure=False): 450 451 def _test(op, output, input): 452 output_exp = torch.empty_like(output) 453 op(input, out=output_exp) 454 self.assertEqual(op(input, out=output), output_exp, msg=op.__name__) 455 456 # output is identical to input: 457 _test(op, output=data[0:sz], input=data[0:sz]) 458 # output and input are independent: 459 _test(op, output=data[0:sz], input=data[sz:2 * sz]) 460 # output partially overlaps with input: 461 if not expected_failure: 462 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 463 _test(op, data[0:sz], data[1:sz + 1]) 464 else: 465 with self.assertRaises(AssertionError): 466 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 467 _test(op, data[0:sz], data[1:sz + 1]) 468 # output is transpose of input: 469 length = int(math.sqrt(sz)) 470 input = data[:length**2].view([length, length]) 471 out = input.t() 472 if not expected_failure: 473 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 474 _test(op, out, input) 475 else: 476 with self.assertRaises(AssertionError): 477 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 478 _test(op, out, input) 479 480 def ternary_check_input_output_mem_overlap(self, op, device, 481 expected_failure=False): 482 sz = 9 483 data = torch.randn(2 * sz, device=device) 484 other1 = torch.randn(sz, device=device) 485 other2 = torch.randn(sz, device=device) 486 487 self.unary_check_input_output_mem_overlap( 488 data, sz, lambda input, out: 489 op(input, other1.view(input.shape), other2.view(input.shape), out=out), 490 expected_failure=expected_failure) 491 492 self.unary_check_input_output_mem_overlap( 493 data, sz, lambda input, out: 494 op(other1.view(input.shape), input, other2.view(input.shape), out=out), 495 expected_failure=expected_failure) 496 497 self.unary_check_input_output_mem_overlap( 498 data, sz, lambda input, out: 499 op(other1.view(input.shape), other2.view(input.shape), input, out=out), 500 expected_failure=expected_failure) 501 502 def _select_broadcastable_dims(self, dims_full=None): 503 # select full dimensionality 504 if dims_full is None: 505 dims_full = [] 506 ndims = random.randint(1, 4) 507 dims_full = [random.randint(1, 8) for _ in range(ndims)] 508 else: 509 ndims = len(dims_full) 510 511 # select actual dimensions for ops: 512 # larger: full ndims, individual sizes may be reduced 513 # smaller: possibly reduced ndims, sizes may be reduced 514 smaller_ndims = random.randint(1, ndims) 515 dims_small = [] 516 dims_large = [] 517 for i in range(ndims - 1, -1, -1): 518 j = random.randint(1, 3) 519 if j == 1: # no reduced singleton dimension 520 ds = dims_full[i] 521 dl = dims_full[i] 522 elif j == 2: # larger may have reduced singleton dimension 523 ds = dims_full[i] 524 dl = 1 if len(dims_small) < smaller_ndims else dims_full[i] 525 elif j == 3: # smaller may have reduced singleton dimension 526 ds = 1 527 dl = dims_full[i] 528 dims_large = [dl] + dims_large 529 if len(dims_small) < smaller_ndims: 530 dims_small = [ds] + dims_small 531 return (dims_small, dims_large, dims_full) 532 533 # collected tests of ops that used scalar_check in Declarations.cwrap for 534 # correctness 535 def test_scalar_check(self, device): 536 zero_d = torch.randn((), device=device) 537 one_d = torch.randn((1,), device=device) 538 539 # remainder 540 self.assertEqual((), torch.remainder(zero_d, zero_d).shape) 541 self.assertEqual((), torch.remainder(zero_d, 2).shape) 542 self.assertEqual((1,), torch.remainder(zero_d, one_d).shape) 543 self.assertEqual((1,), torch.remainder(one_d, zero_d).shape) 544 545 # fmod 546 self.assertEqual((), torch.fmod(zero_d, zero_d).shape) 547 self.assertEqual((), torch.fmod(zero_d, 2).shape) 548 self.assertEqual((1,), torch.fmod(zero_d, one_d).shape) 549 self.assertEqual((1,), torch.fmod(one_d, zero_d).shape) 550 551 # exp, cos, cosh, tan, atan, tanh, erf, erfc, reciprocal 552 self.assertEqual((), torch.exp(zero_d).shape) 553 self.assertEqual((), torch.cos(zero_d).shape) 554 self.assertEqual((), torch.cosh(zero_d).shape) 555 self.assertEqual((), torch.tan(zero_d).shape) 556 self.assertEqual((), torch.atan(zero_d).shape) 557 self.assertEqual((), torch.acosh(zero_d).shape) 558 self.assertEqual((), torch.asinh(zero_d).shape) 559 self.assertEqual((), torch.atanh(zero_d).shape) 560 self.assertEqual((), torch.tanh(zero_d).shape) 561 self.assertEqual((), torch.erf(zero_d).shape) 562 self.assertEqual((), torch.erfc(zero_d).shape) 563 self.assertEqual((), torch.reciprocal(zero_d).shape) 564 self.assertEqual((1,), torch.exp(one_d).shape) 565 self.assertEqual((1,), torch.cos(one_d).shape) 566 self.assertEqual((1,), torch.cosh(one_d).shape) 567 self.assertEqual((1,), torch.tan(one_d).shape) 568 self.assertEqual((1,), torch.atan(one_d).shape) 569 self.assertEqual((1,), torch.acosh(one_d).shape) 570 self.assertEqual((1,), torch.asinh(one_d).shape) 571 self.assertEqual((1,), torch.atanh(one_d).shape) 572 self.assertEqual((1,), torch.tanh(one_d).shape) 573 self.assertEqual((1,), torch.erf(one_d).shape) 574 self.assertEqual((1,), torch.erfc(one_d).shape) 575 self.assertEqual((1,), torch.reciprocal(one_d).shape) 576 577 # clamp 578 self.assertEqual((), torch.clamp(zero_d, min=0, max=1).shape) 579 self.assertEqual((), torch.clamp(zero_d, min=0).shape) 580 self.assertEqual((), torch.clamp(zero_d, max=1).shape) 581 self.assertEqual((1,), torch.clamp(one_d, min=0, max=1).shape) 582 self.assertEqual((1,), torch.clamp(one_d, min=0).shape) 583 self.assertEqual((1,), torch.clamp(one_d, max=1).shape) 584 585 # cumsum, cumprod, cummax, cummin 586 self.assertEqual((), torch.logcumsumexp(zero_d, 0).shape) 587 self.assertEqual((), torch.cumsum(zero_d, 0).shape) 588 self.assertEqual((), torch.cumprod(zero_d, 0).shape) 589 self.assertEqual((), torch.cummax(zero_d, 0)[0].shape) 590 self.assertEqual((), torch.cummin(zero_d, 0)[0].shape) 591 592 # sort, topk 593 self.assertEqual([(), ()], [x.shape for x in torch.sort(zero_d, 0, False)]) 594 self.assertEqual([(), ()], [x.shape for x in torch.sort(zero_d, 0, True)]) 595 self.assertEqual([(), ()], [x.shape for x in torch.topk(zero_d, 1, 0, False)]) 596 self.assertEqual([(), ()], [x.shape for x in torch.topk(zero_d, 1, 0, True)]) 597 598 # max, min 599 self.assertEqual((), torch.max(zero_d, zero_d).shape) 600 self.assertEqual((1,), torch.max(one_d, zero_d).shape) 601 self.assertEqual((1,), torch.max(zero_d, one_d).shape) 602 self.assertEqual((), torch.min(zero_d, zero_d).shape) 603 self.assertEqual((1,), torch.min(one_d, zero_d).shape) 604 self.assertEqual((1,), torch.min(zero_d, one_d).shape) 605 606 zero_d_int = torch.tensor(1, device=device) 607 one_d_int = torch.tensor([1], device=device) 608 609 # lshift, rshift 610 self.assertEqual((), (zero_d_int >> zero_d_int).shape) 611 self.assertEqual((), (zero_d_int >> 1).shape) 612 self.assertEqual((1,), (one_d_int >> zero_d_int).shape) 613 self.assertEqual((1,), (zero_d_int >> one_d_int).shape) 614 self.assertEqual((1,), (one_d_int >> 1).shape) 615 616 self.assertEqual((), (zero_d_int << zero_d_int).shape) 617 self.assertEqual((), (zero_d_int << 1).shape) 618 self.assertEqual((1,), (one_d_int << zero_d_int).shape) 619 self.assertEqual((1,), (zero_d_int << one_d_int).shape) 620 self.assertEqual((1,), (one_d_int << 1).shape) 621 622 # or 623 self.assertEqual((), (zero_d_int | zero_d_int).shape) 624 self.assertEqual((), (zero_d_int | 1).shape) 625 self.assertEqual((1,), (one_d_int | zero_d_int).shape) 626 self.assertEqual((1,), (zero_d_int | one_d_int).shape) 627 self.assertEqual((1,), (one_d_int | 1).shape) 628 629 # and 630 self.assertEqual((), (zero_d_int & zero_d_int).shape) 631 self.assertEqual((), (zero_d_int & 1).shape) 632 self.assertEqual((1,), (one_d_int & zero_d_int).shape) 633 self.assertEqual((1,), (zero_d_int & one_d_int).shape) 634 self.assertEqual((1,), (one_d_int & 1).shape) 635 636 # clone 637 self.assertEqual((), zero_d.clone().shape) 638 639 zero_d_bool = torch.tensor(True, device=device) 640 one_d_bool = torch.tensor([True], device=device) 641 642 # masked_select 643 self.assertEqual((1,), torch.masked_select(zero_d_bool, zero_d_bool).shape) 644 self.assertEqual((1,), torch.masked_select(zero_d_bool, one_d_bool).shape) 645 self.assertEqual((1,), torch.masked_select(one_d_bool, zero_d_bool).shape) 646 647 zero_d_uint8 = torch.tensor(1, dtype=torch.uint8, device=device) 648 one_d_uint8 = torch.tensor([1], dtype=torch.uint8, device=device) 649 650 # mode 651 self.assertEqual([(), ()], [x.shape for x in torch.mode(zero_d, dim=0, keepdim=True)]) 652 self.assertEqual([(), ()], [x.shape for x in torch.mode(zero_d, dim=0, keepdim=False)]) 653 self.assertEqual([(1,), (1,)], [x.shape for x in torch.mode(one_d, dim=0, keepdim=True)]) 654 self.assertEqual([(), ()], [x.shape for x in torch.mode(one_d, dim=0, keepdim=False)]) 655 656 # max 657 self.assertEqual([(), ()], [x.shape for x in torch.max(zero_d, dim=0, keepdim=True)]) 658 self.assertEqual([(), ()], [x.shape for x in torch.max(zero_d, dim=0, keepdim=False)]) 659 self.assertEqual([(1,), (1,)], [x.shape for x in torch.max(one_d, dim=0, keepdim=True)]) 660 self.assertEqual([(), ()], [x.shape for x in torch.max(one_d, dim=0, keepdim=False)]) 661 662 # amax 663 self.assertEqual((), torch.amax(zero_d, dim=0, keepdim=True).shape) 664 self.assertEqual((), torch.amax(zero_d, dim=0, keepdim=False).shape) 665 self.assertEqual((1,), torch.amax(one_d, dim=0, keepdim=True).shape) 666 self.assertEqual((), torch.amax(one_d, dim=0, keepdim=False).shape) 667 668 # min 669 self.assertEqual([(), ()], [x.shape for x in torch.min(zero_d, dim=0, keepdim=True)]) 670 self.assertEqual([(), ()], [x.shape for x in torch.min(zero_d, dim=0, keepdim=False)]) 671 self.assertEqual([(1,), (1,)], [x.shape for x in torch.min(one_d, dim=0, keepdim=True)]) 672 self.assertEqual([(), ()], [x.shape for x in torch.min(one_d, dim=0, keepdim=False)]) 673 674 # amin 675 self.assertEqual((), torch.amin(zero_d, dim=0, keepdim=True).shape) 676 self.assertEqual((), torch.amin(zero_d, dim=0, keepdim=False).shape) 677 self.assertEqual((1,), torch.amin(one_d, dim=0, keepdim=True).shape) 678 self.assertEqual((), torch.amin(one_d, dim=0, keepdim=False).shape) 679 680 # set_ 681 zero_d_clone = zero_d.clone() 682 one_d_clone = one_d.clone() 683 self.assertEqual((), zero_d_clone.set_(one_d.storage(), 0, (), ()).shape) 684 self.assertEqual((1,), zero_d_clone.set_(one_d.storage(), 0, (1,), (1,)).shape) 685 self.assertEqual((), one_d_clone.set_(one_d.storage(), 0, (), ()).shape) 686 self.assertEqual((1,), one_d_clone.set_(one_d.storage(), 0, (1,), (1,)).shape) 687 688 self.assertEqual((), zero_d.clone().set_(zero_d).shape) 689 self.assertEqual((), one_d.clone().set_(zero_d).shape) 690 self.assertEqual((1,), zero_d.clone().set_(one_d).shape) 691 self.assertEqual((1,), one_d.clone().set_(one_d).shape) 692 693 # take 694 self.assertEqual((), torch.randn((2, 3), device=device).take(zero_d_int).shape) 695 self.assertEqual((1,), torch.randn((2, 3), device=device).take(one_d_int).shape) 696 697 # gather 698 self.assertEqual((), torch.gather(zero_d, 0, torch.zeros((), dtype=torch.int64, device=device)).shape) 699 self.assertEqual((1,), torch.gather(zero_d, 0, torch.zeros((1,), dtype=torch.int64, device=device)).shape) 700 self.assertEqual((), torch.gather(one_d, 0, torch.zeros((), dtype=torch.int64, device=device)).shape) 701 self.assertEqual((1,), torch.gather(one_d, 0, torch.zeros((1,), dtype=torch.int64, device=device)).shape) 702 703 # normal 704 # std must be >= 0 705 zero_d_ge_0 = torch.rand((), device=device) 706 # documentation says out shape matches shape of mean 707 self.assertEqual((), torch.normal(zero_d, zero_d_ge_0).shape) 708 self.assertEqual((1,), torch.normal(one_d, zero_d_ge_0).shape) 709 self.assertEqual((), torch.normal(1, zero_d_ge_0).shape) 710 self.assertEqual((), torch.normal(zero_d, 1).shape) 711 self.assertEqual((1,), torch.normal(one_d, 1).shape) 712 # TODO: this behavior differs on CPU and GPU, see https://github.com/pytorch/pytorch/issues/30480. 713 # self.assertEqual((), torch.normal(zero_d, one_d).shape) 714 # self.assertEqual((), torch.normal(1, one_d).shape) 715 716 # convolutions. Yes, we are testing nn.functional here; seems justified 717 # given its similar to the other tests 718 w = torch.randn(2, 1, 3, 3, device=device).div_(2).requires_grad_() 719 self.assertRaises(RuntimeError, lambda: torch.nn.functional.conv2d(zero_d, w, groups=1)) 720 self.assertRaises(RuntimeError, lambda: torch.nn.functional.conv2d(zero_d, w, groups=2)) 721 722 # nll_loss -- verify input can't be 0-dimensional. 723 self.assertRaises(ValueError, lambda: torch.nn.functional.nll_loss(zero_d, zero_d, reduction='none')) 724 self.assertRaises(ValueError, lambda: torch.nn.functional.nll_loss(zero_d, one_d, reduction='none')) 725 # verify output is 0-dimensional when reduction != 'none' 726 for (input, target) in ((torch.randn(1, 1, device=device), torch.tensor([0], device=device)), 727 (torch.randn(1, 1, 1, 1, device=device), torch.tensor([[[0]]], device=device))): 728 self.assertEqual((), torch.nn.functional.nll_loss(input, target, reduction='mean').shape) 729 self.assertEqual((), torch.nn.functional.nll_loss(input, target, reduction='sum').shape) 730 731 # Test that `torch._check_tensor_all` raises errors in the correct cases 732 def test_check_tensor_all(self, device): 733 default_message = 'Expected cond to be True' 734 check_fn = torch._check_tensor_all 735 expected_error = RuntimeError 736 737 # cond must be a tensor 738 with self.assertRaisesRegex(TypeError, 'cond must be a tensor'): 739 check_fn(True) 740 741 # cond tensor must be boolean 742 with self.assertRaisesRegex(TypeError, 'cond tensor must have dtype torch.bool'): 743 check_fn(torch.ones(1, device=device)) 744 745 test_sizes = [ 746 (), 747 (1,), 748 (10,), 749 (1, 1), 750 (1, 10), 751 (10, 1), 752 (10, 10), 753 (1, 1, 1), 754 (10, 1, 1), 755 (1, 10, 1), 756 (10, 10, 10), 757 ] 758 for size in test_sizes: 759 t_all_true = torch.ones(size, dtype=torch.bool, device=device) 760 t_all_false = torch.zeros(size, dtype=torch.bool, device=device) 761 762 # Should not raise error 763 check_fn(t_all_true) 764 765 with self.assertRaisesRegex(expected_error, default_message): 766 check_fn(t_all_false) 767 768 if t_all_true.numel() > 1: 769 t_all_true_but_one = t_all_true.clone() 770 # Choose a random element to set to false 771 idx = (random.choice(range(dim_size)) for dim_size in size) 772 t_all_true_but_one[(..., *idx)] = False 773 774 with self.assertRaisesRegex(expected_error, default_message): 775 check_fn(t_all_true_but_one) 776 777 # Test a simple failure message 778 message = 'message' 779 with self.assertRaisesRegex(expected_error, message): 780 check_fn(t_all_false, lambda: message) 781 782 # Test message with tensor 783 def message(): 784 return torch.arange(4) 785 786 with self.assertRaisesRegex(expected_error, re.escape(str(message()))): 787 check_fn(t_all_false, message) 788 789 # Test format string message 790 def message(): 791 return f"{'test'} {[1, 2, 'a', True]} {True} {100} {torch.arange(4)}" 792 793 with self.assertRaisesRegex(expected_error, re.escape(str(message()))): 794 check_fn(t_all_false, message) 795 796 # Test that `TORCH_CHECK_TENSOR_ALL` raises errors that propagate from C++ to Python 797 def test_check_tensor_internal(self, device): 798 test_sizes = [ 799 (), 800 (1,), 801 (10,), 802 (1, 1), 803 (1, 10), 804 (10, 1), 805 (10, 10), 806 (1, 1, 1), 807 (10, 1, 1), 808 (1, 10, 1), 809 (10, 10, 10), 810 ] 811 for size in test_sizes: 812 t_all_true = torch.ones(size, dtype=torch.bool, device=device) 813 t_all_false = torch.zeros(size, dtype=torch.bool, device=device) 814 815 # Should not raise error 816 torch._test_check_tensor(t_all_true) 817 818 with self.assertRaisesRegex(RuntimeError, "Test message for TORCH_CHECK_TENSOR_ALL"): 819 torch._test_check_tensor(t_all_false) 820 821 if t_all_true.numel() > 1: 822 t_all_true_but_one = t_all_true.clone() 823 # Choose a random element to set to false 824 idx = (random.choice(range(dim_size)) for dim_size in size) 825 t_all_true_but_one[(..., *idx)] = False 826 827 with self.assertRaisesRegex(RuntimeError, "Test message for TORCH_CHECK_TENSOR_ALL"): 828 torch._test_check_tensor(t_all_true_but_one) 829 830 # Uses mismatched arange out size to trigger a warning 831 @skipIfTorchDynamo("Not a suitable test for TorchDynamo") 832 @unittest.skipIf(TEST_WITH_CROSSREF, "crossref perturbs line numbering") 833 def test_cpp_warnings_have_python_context(self, device): 834 # Creates long string in advance to avoid a too-long Python line 835 s = ".+Triggered internally at.+RangeFactories.+" 836 # nvfuser deprecation warning filter 837 warnings.filterwarnings("ignore", "torch::jit::fuser::cuda", UserWarning) 838 839 def cpp_warn_fn(): 840 out = torch.empty((5,)) 841 torch.arange(0, 3, out=out) 842 return out 843 844 # Checks eager-mode cpp warning 845 with warnings.catch_warnings(record=True) as w: 846 cpp_warn_fn() 847 frameinfo = inspect.getframeinfo(inspect.currentframe()) 848 warning = w[0] 849 850 # Checks for cpp context in the warning message 851 escaped_warning_message = str(warning.message).encode('unicode_escape') 852 self.assertTrue(re.search(s, repr(escaped_warning_message), re.IGNORECASE) is not None) 853 854 # Checks the Python features of the warning 855 # Note: the eager mode warning refers to the line in the function 856 # that throws the warning. 857 self.assertEqual(frameinfo.lineno - 6, warning.lineno) 858 self.assertEqual(len(w), 1) 859 860 # Checks jitted cpp warning 861 with warnings.catch_warnings(record=True) as w: 862 scripted_cpp_warn_fn = torch.jit.script(cpp_warn_fn) 863 scripted_cpp_warn_fn() 864 warning = w[0] 865 866 # Checks for cpp context in the warning message 867 escaped_warning_message = str(warning.message).encode('unicode_escape') 868 self.assertTrue(re.search(s, repr(escaped_warning_message), re.IGNORECASE) is not None) 869 870 # Checks the Python features of the warning 871 # Note: the jitted warning's lineno refers to the call to the jitted 872 # function, which in our test suite has a layer of indirection 873 # that makes checking the Python lineno fragile 874 self.assertEqual(len(w), 1) 875 876 # Checks jitted Python warning 877 def warn_fn(): 878 warnings.warn("Warning!") 879 880 # The jit mimics an eager-mode Python warning in this case 881 with warnings.catch_warnings(record=True) as w: 882 scripted_warn_fn = torch.jit.script(warn_fn) 883 scripted_warn_fn() 884 frameinfo = inspect.getframeinfo(inspect.currentframe()) 885 warning = w[0] 886 887 self.assertTrue(re.search('Warning!', str(warning.message)) is not None) 888 889 # Checks the Python features of the warning 890 self.assertEqual(frameinfo.lineno - 6, warning.lineno) 891 self.assertEqual(len(w), 1) 892 893 # FIXME: move to test_testing 894 @onlyCPU 895 def test_warn_always_caught(self, device): 896 # Check that we can catch a TORCH_WARN_ONCE warning twice 897 # since assertWarnsOnceRegex uses set_warn_always(True) which changes 898 # TORCH_WARN_ONCE to TORCH_WARN 899 a = np.arange(10) 900 a.flags.writeable = False 901 with self.assertWarnsOnceRegex(UserWarning, '.*non-writable.*'): 902 torch.from_numpy(a) 903 904 # OK, got it once, now try again 905 with self.assertWarnsOnceRegex(UserWarning, '.*non-writable.*'): 906 torch.from_numpy(a) 907 908 # Make sure emitting two warnings will pass the assertWarnsOnceRegex 909 # context manager 910 with self.assertWarnsOnceRegex(UserWarning, '.*non-writable.*'): 911 torch.from_numpy(a) 912 torch.from_numpy(a) 913 914 @onlyNativeDeviceTypes 915 def test_complex_half_experimental_warning(self, device): 916 msg = 'ComplexHalf support is experimental' 917 with self.assertWarnsOnceRegex(UserWarning, msg): 918 t = torch.randn(3, dtype=torch.chalf, device=device) 919 920 with self.assertWarnsOnceRegex(UserWarning, msg): 921 torch.rand(3, dtype=torch.chalf, device=device) 922 923 with self.assertWarnsOnceRegex(UserWarning, msg): 924 torch.empty(3, dtype=torch.chalf, device=device) 925 926 with self.assertWarnsOnceRegex(UserWarning, msg): 927 torch.ones(3, dtype=torch.chalf, device=device) 928 929 with self.assertWarnsOnceRegex(UserWarning, msg): 930 torch.zeros(3, dtype=torch.chalf, device=device) 931 932 with self.assertWarnsOnceRegex(UserWarning, msg): 933 torch.randn_like(t) 934 935 with self.assertWarnsOnceRegex(UserWarning, msg): 936 torch.rand_like(t) 937 938 with self.assertWarnsOnceRegex(UserWarning, msg): 939 torch.empty_like(t) 940 941 with self.assertWarnsOnceRegex(UserWarning, msg): 942 torch.ones_like(t) 943 944 with self.assertWarnsOnceRegex(UserWarning, msg): 945 torch.zeros_like(t) 946 947 with self.assertWarnsOnceRegex(UserWarning, msg): 948 # t + 1 allocates a new tensor for result using empty 949 t + 1 950 951 @onlyCUDA 952 def test_dtypetensor_warnings(self, device): 953 msg = 'The torch.cuda.*DtypeTensor constructors are no longer recommended' 954 with self.assertWarnsOnceRegex(UserWarning, msg): 955 t = torch.cuda.FloatTensor([0]) 956 957 with self.assertWarnsOnceRegex(UserWarning, msg): 958 t = torch.cuda.DoubleTensor([0]) 959 960 def test_set_default_tensor_type_warnings(self, device): 961 msg = '.*is deprecated as of PyTorch 2.1, please use torch.set_default_dtype().*' 962 default_type = torch.tensor([]).type() 963 try: 964 with self.assertWarnsOnceRegex(UserWarning, msg): 965 torch.set_default_tensor_type(torch.FloatTensor) 966 967 if torch.cuda.is_available(): 968 with self.assertWarnsOnceRegex(UserWarning, msg): 969 torch.set_default_tensor_type(torch.cuda.FloatTensor) 970 finally: 971 torch.set_default_tensor_type(default_type) 972 973 # TODO: this test should be in test_nn.py 974 def test_conv_transposed_backward_agnostic_to_memory_format(self, device): 975 in_channels = 64 976 out_channels = 128 977 scale_factor = 8 978 batch_size = 8 979 length = 16 980 981 conv = torch.nn.ConvTranspose1d( 982 in_channels, out_channels, kernel_size=scale_factor * 2, stride=scale_factor).to(device) 983 layer_norm = torch.nn.LayerNorm(out_channels).to(device) 984 985 input_ = torch.randn(batch_size, in_channels, length).to(device).contiguous() 986 input_ = conv(input_).contiguous() 987 input_ = layer_norm(input_.transpose(1, 2).contiguous()).contiguous() 988 input_.sum().backward() 989 990 # 3d 991 conv = torch.nn.ConvTranspose3d(3, 3, kernel_size=3).to(device) 992 input = torch.randn(batch_size, 3, length, length, length, device=device) 993 out = conv(input) 994 out.backward(torch.ones_like(out).transpose(-2, -1)) 995 996 # TODO: this test should be in test_nn.py 997 @onlyCUDA 998 @largeTensorTest('12GB') 999 def test_conv_transposed_large(self, device): 1000 # ConvTranspose3d works for large input tensors (gh-32866) 1001 in_channels = 64 1002 out_channels = 128 1003 kernel_size = 5 1004 1005 conv = torch.nn.ConvTranspose3d( 1006 in_channels, out_channels, kernel_size=kernel_size, 1007 stride=2, padding=2, output_padding=1).to(device) 1008 1009 x = torch.rand([1, 64, 8, 128, 172]).to(device) 1010 y = conv(x) 1011 1012 def test_is_set_to(self, device): 1013 t1 = torch.empty(3, 4, 9, 10, device=device) 1014 t2 = torch.empty(3, 4, 9, 10, device=device) 1015 t3 = torch.tensor([], device=device).set_(t1) 1016 t4 = t3.clone().resize_(12, 90) 1017 self.assertFalse(t1.is_set_to(t2)) 1018 self.assertTrue(t1.is_set_to(t3)) 1019 self.assertTrue(t3.is_set_to(t1), "is_set_to should be symmetric") 1020 self.assertFalse(t1.is_set_to(t4)) 1021 self.assertFalse(torch.tensor([]).is_set_to(torch.tensor([])), 1022 "Tensors with no storages should not appear to be set " 1023 "to each other") 1024 1025 t1 = torch.tensor([True, True], dtype=torch.bool, device=device) 1026 t2 = torch.tensor([0], dtype=torch.bool, device=device).set_(t1) 1027 self.assertTrue(t1.is_set_to(t2)) 1028 1029 # test that sizes must match 1030 t1 = torch.empty([2, 3, 4], device=device) 1031 t2 = t1.view(4, 3, 2) 1032 self.assertFalse(t1.is_set_to(t2)) 1033 self.assertFalse(t2.is_set_to(t1)) 1034 1035 # test that legacy empty size behavior used to be respected (i.e. all 1036 # empty tensors were logically collapsed to size [0]). 1037 t1 = torch.empty([2, 5, 0], device=device) 1038 t2 = t1.view([0]) 1039 self.assertFalse(t1.is_set_to(t2)) 1040 self.assertFalse(t2.is_set_to(t1)) 1041 1042 # See https://github.com/pytorch/pytorch/issues/72650 1043 @skipIfMps 1044 @skipMeta 1045 @parametrize( 1046 "fn", 1047 [ 1048 "dist", "atan2", "pow", "lerp", "add", "sub", "mul", "div", "fmod", "remainder", "eq", "ge", "gt", "le", 1049 "lt", "max", "min", "ne", "addcdiv", "addcmul", "masked_scatter", "masked_select", "masked_fill", "map", 1050 "map2", "copy", 1051 ], 1052 ) 1053 def test_broadcast(self, fn, device): 1054 # functions with three tensor arguments 1055 fns_3_args = {"map2"} 1056 fns_value_kwarg = {"addcdiv", "addcmul"} 1057 1058 (dims_small, dims_large, dims_full) = self._select_broadcastable_dims() 1059 full1d = torch.randn(*dims_full, device=device).flatten().float() 1060 small = torch.randn(*dims_small, device=device).float() 1061 large = torch.randn(*dims_large, device=device).float() 1062 small_expanded = small.expand(*dims_full) 1063 large_expanded = large.expand(*dims_full) 1064 small2 = None 1065 small2_expanded = None 1066 if fn in fns_3_args or fn in fns_value_kwarg: 1067 # create another smaller tensor 1068 (dims_small2, _, _) = self._select_broadcastable_dims(dims_full) 1069 small2 = torch.randn(*dims_small2, device=device).float() 1070 small2_expanded = small2.expand(*dims_full) 1071 1072 if small.is_cuda and fn in ['map', 'map2']: 1073 # map and map2 are not implementd on CUDA tensors 1074 return 1075 1076 if hasattr(large_expanded, fn): 1077 # run through tensor versions of functions 1078 # and verify fully expanded inputs give same results 1079 expanded = {large: large_expanded, small: small_expanded, small2: small2_expanded} 1080 1081 def tensorfn(myfn, t1, t2): 1082 if fn == "lerp": 1083 return myfn(t1, 0.5) 1084 elif fn == "masked_select": 1085 return myfn(t1 < 0) 1086 elif fn == "masked_scatter": 1087 return myfn(t1 < 0.5, full1d) 1088 elif fn == "masked_fill": 1089 return myfn(t1 < 0.5, 1.0) 1090 elif fn in fns_3_args: 1091 return myfn(1, t1, t2) 1092 elif fn in fns_value_kwarg: 1093 return myfn(t1, t2, value=1) 1094 else: 1095 return myfn(t1) 1096 1097 # test various orders 1098 for first, second, third in [(large, small, small2), (small, large, small2), 1099 (small2, small, large), (small2, large, small)]: 1100 if first is None: 1101 break # ignore last iter when small2 is None 1102 method_expanded = getattr(expanded[first], fn) 1103 method = getattr(first, fn) 1104 r1 = tensorfn(method_expanded, expanded[second], expanded[third]) 1105 r2 = tensorfn(method, second, third) 1106 self.assertEqual(r1, r2) 1107 1108 # now for torch. versions of functions 1109 if hasattr(torch, fn): 1110 fntorch = getattr(torch, fn) 1111 expanded = {large: large_expanded, small: small_expanded, small2: small2_expanded} 1112 1113 def torchfn(t1, t2, t3): 1114 if fn == "lerp": 1115 return fntorch(t1, t2, 0.5) 1116 elif fn == "masked_select": 1117 return fntorch(t1, t2 < 0) 1118 elif fn == "masked_scatter": 1119 return fntorch(t1, t2 < 0.5, full1d) 1120 elif fn == "masked_fill": 1121 return fntorch(t1, t2 < 0.5, 1.0) 1122 elif fn in fns_3_args: 1123 return fntorch(t1, 1.0, t2, t3) 1124 elif fn in fns_value_kwarg: 1125 return fntorch(t1, t2, t3, value=1.0) 1126 else: 1127 return fntorch(t1, t2) 1128 1129 # test various orders 1130 for first, second, third in [(large, small, small2), (small, large, small2), 1131 (small2, small, large), (small2, large, small)]: 1132 if first is None: 1133 break # ignore last iter when small2 is None 1134 r1 = torchfn(expanded[first], expanded[second], expanded[third]) 1135 r2 = torchfn(first, second, third) 1136 self.assertEqual(r1, r2) 1137 1138 # now for in place functions 1139 # in-place tensor is not broadcastable; test only guaranteed 1140 # to work by broadcasting other argument(s) 1141 if not hasattr(large_expanded, fn + "_"): 1142 return 1143 1144 # need to clone largeExpanded so we can reuse, since functions are in-place 1145 large_expanded_clone = large_expanded.clone() 1146 1147 def tensorfn_inplace(t0, t1, t2=None): 1148 t0_fn = getattr(t0, fn + "_") 1149 if fn == "lerp": 1150 return t0_fn(t1, 0.5) 1151 elif fn == "masked_scatter": 1152 return t0_fn(t1 < 0.5, full1d) 1153 elif fn == "masked_fill": 1154 return t0_fn(t1 < 0.5, 1.0) 1155 elif fn == "map": 1156 return t0_fn(t1, lambda x, y: x + y) 1157 elif fn == "map2": 1158 return t0_fn(t1, t2, lambda x, y, z: x + y + z) 1159 elif fn in fns_3_args: 1160 return t0_fn(1.0, t1, t2) 1161 elif fn in fns_value_kwarg: 1162 return t0_fn(t1, t2, value=1.0) 1163 else: 1164 return t0_fn(t1) 1165 # in-place pointwise operations don't actually work if the in-place 1166 # tensor is 0-strided (numpy has the same issue) 1167 if (0 not in large_expanded.stride() and 0 not in large_expanded_clone.stride()): 1168 r1 = tensorfn_inplace(large_expanded, small_expanded, small2_expanded) 1169 r2 = tensorfn_inplace(large_expanded_clone, small, small2) 1170 self.assertEqual(r1, r2) 1171 1172 def broadcastable(t0, t1, t2=None): 1173 try: 1174 t1.expand_as(t0) 1175 if t2 is not None: 1176 t2.expand_as(t0) 1177 except RuntimeError: 1178 return False 1179 return True 1180 1181 def _test_in_place_broadcastable(t0, t1, t2=None): 1182 if not broadcastable(t0, t1, t2): 1183 same_size = t0.numel() == t1.numel() and (t0.numel() == t2.numel() if t2 is not None else True) 1184 if not same_size: 1185 # Functionalization converts the inplace to an out-of-place, which causes us to error. 1186 # We should fix this, but "error probably on bad inputs" isn't a hi-pri PT2 item. 1187 if not TEST_WITH_TORCHINDUCTOR: 1188 self.assertRaises(RuntimeError, lambda: tensorfn_inplace(t0, t1, t2)) 1189 else: 1190 tensorfn_inplace(t0, t1, t2) 1191 1192 if fn not in fns_3_args and fn not in fns_value_kwarg: 1193 _test_in_place_broadcastable(small, large_expanded) 1194 _test_in_place_broadcastable(small, large) 1195 else: 1196 _test_in_place_broadcastable(small2, small_expanded, large_expanded) 1197 _test_in_place_broadcastable(small2, small, large) 1198 1199 @unittest.skipIf(IS_FBCODE and IS_REMOTE_GPU, "cublas runtime error") 1200 @onlyCUDA 1201 @wrapDeterministicFlagAPITest 1202 def test_cublas_config_nondeterministic_alert(self, device): 1203 test_cases = [ 1204 # (function, (tensor sizes)) 1205 ('mm', ((2, 2), (2, 2),)), 1206 ('mv', ((2, 2), (2,),)), 1207 ('bmm', ((1, 2, 2), (1, 2, 2),))] 1208 1209 test_configs = [ 1210 # (CuBLAS workspace config, is deterministic) 1211 ('garbage', False), 1212 (None, False), 1213 (':4096:8', True), 1214 (':16:8', True)] 1215 1216 cublas_var_name = 'CUBLAS_WORKSPACE_CONFIG' 1217 is_cuda10_2_or_higher = ( 1218 (torch.version.cuda is not None) 1219 and ([int(x) for x in torch.version.cuda.split(".")] >= [10, 2])) 1220 1221 def test_case_info(fn_name, config): 1222 return f'function "{fn_name}" with config "{"" if config is None else config}"' 1223 1224 # Create processes to test each combination of test cases and config settings 1225 processes = [] 1226 for fn_name, arg_sizes in test_cases: 1227 for config, is_config_deterministic in test_configs: 1228 env = os.environ.copy() 1229 if config is None: 1230 if env.get(cublas_var_name) is not None: 1231 del env[cublas_var_name] 1232 else: 1233 env[cublas_var_name] = config 1234 should_throw_error = is_cuda10_2_or_higher and not is_config_deterministic 1235 script = f""" 1236import torch 1237torch.use_deterministic_algorithms(True) 1238fn = torch.{fn_name} 1239arg_sizes = {arg_sizes} 1240device = '{device}' 1241should_throw_error = {should_throw_error} 1242args = [] 1243for arg_size in arg_sizes: 1244 args.append(torch.randn(*arg_size, device=device)) 1245try: 1246 fn(*args) 1247except RuntimeError as e: 1248 if not should_throw_error: 1249 raise RuntimeError('Did not expect any error to be raised') 1250 elif 'Deterministic behavior was enabled with either' not in str(e): 1251 raise RuntimeError('Expected a CuBLAS nondeterministic error, but got a different error') 1252else: 1253 if should_throw_error: 1254 raise RuntimeError('Expected a CuBLAS nondeterministic error, but it was not raised') 1255 1256""" 1257 try: 1258 subprocess.check_output( 1259 [sys.executable, '-c', script], 1260 stderr=subprocess.STDOUT, 1261 # On Windows, opening the subprocess with the default CWD makes `import torch` 1262 # fail, so just set CWD to this script's directory 1263 cwd=os.path.dirname(os.path.realpath(__file__)), 1264 env=env) 1265 except subprocess.CalledProcessError as e: 1266 self.fail(msg=( 1267 f'Subprocess exception while attempting to run {test_case_info(fn_name, config)}:\n' 1268 + e.output.decode("utf-8"))) 1269 1270 @onlyCPU 1271 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1272 @dtypes(*get_all_qint_dtypes()) 1273 def test_nondeterministic_resize_quantized(self, device, dtype): 1274 a = torch.tensor([-1, 0, 1, 2, 3], dtype=torch.float, device=device) 1275 b = torch.quantize_per_tensor(a, 0.1, 10, dtype) 1276 self.check_nondeterministic_alert( 1277 lambda: b.resize_((10,)), 1278 'quantized_resize_cpu_') 1279 1280 @skipXLA 1281 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1282 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16, torch.uint16, torch.uint32, torch.uint64)) 1283 def test_deterministic_resize(self, device, dtype): 1284 test_cases = [ 1285 # size, stride, resize_size 1286 ((10,), (1,), (5,)), 1287 ((10,), (0,), (10,)), 1288 ((10,), (1,), (20,)), 1289 ((2, 3, 4), None, (2, 3, 4)), 1290 ((2, 3, 4), None, (6, 3, 4)), 1291 ((2, 3, 4), None, (2, 5, 4)), 1292 ((2, 3, 4), None, (2, 3, 6)), 1293 ((2, 3, 4), None, (3, 4, 5)), 1294 ((2, 3, 4), (1, 4, 12), (2, 3, 4)), 1295 ((2, 3, 4), (1, 4, 12), (4, 3, 4)), 1296 ((2, 3, 4), (1, 4, 12), (2, 4, 4)), 1297 ((2, 3, 4), (1, 4, 12), (2, 3, 5)), 1298 ((2, 3, 4), (1, 4, 12), (3, 4, 5)), 1299 ((2, 3, 4), (1, 0, 1), (2, 4, 5)), 1300 ] 1301 1302 for size, stride, resize_size in test_cases: 1303 if stride is None: 1304 a = torch.zeros(size, dtype=dtype, device=device) 1305 else: 1306 a = torch.empty_strided(size, stride, dtype=dtype, device=device).fill_(0) 1307 old_storage = a.untyped_storage().clone() 1308 with DeterministicGuard(True, fill_uninitialized_memory=True): 1309 a.resize_(resize_size) 1310 1311 new_storage = a.untyped_storage() 1312 1313 # If storage size was increased, check that the new section is 1314 # filled with NaN/MAX_INT. Otherwise, check that the storages are 1315 # equal. 1316 old_tensor = torch.tensor(old_storage, dtype=dtype) 1317 old_numel = old_tensor.numel() 1318 new_tensor = torch.tensor(new_storage, dtype=dtype) 1319 new_numel = new_tensor.numel() 1320 1321 if new_numel > old_numel: 1322 self.assertEqual(new_tensor[:old_numel], old_tensor) 1323 fill_section = new_tensor[old_numel:] 1324 1325 if dtype.is_floating_point or dtype.is_complex: 1326 self.assertTrue(fill_section.isnan().all()) 1327 else: 1328 if dtype == torch.bool: 1329 max_val = True 1330 else: 1331 max_val = torch.iinfo(dtype).max 1332 self.assertTrue(fill_section.eq(max_val).all()) 1333 else: 1334 self.assertEqual(old_tensor, new_tensor) 1335 1336 # When deterministic algorithms are enabled, `torch.empty` should fill floating 1337 # point tensors with NaN and integer tensors with MAX_INT 1338 @skipXLA 1339 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1340 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16, torch.uint16, torch.uint32, torch.uint64)) 1341 def test_deterministic_empty(self, device, dtype): 1342 gen_fns = [ 1343 lambda: torch.empty(10, 9, device=device, dtype=dtype), 1344 lambda: torch.empty(10, 9, out=torch.zeros(1, device=device, dtype=dtype)), 1345 lambda: torch.empty_like(torch.zeros(10, 9, device=device, dtype=dtype)), 1346 lambda: torch.empty_like(torch.zeros(10, 9, device=device, dtype=dtype), memory_format=torch.contiguous_format), 1347 lambda: torch.empty_strided((10, 9), (1, 5), device=device, dtype=dtype), 1348 lambda: torch.empty_permuted((2, 3, 5), (1, 0, 2), device=device, dtype=dtype), 1349 ] 1350 1351 for gen_fn in gen_fns: 1352 with DeterministicGuard(True, fill_uninitialized_memory=True): 1353 res = gen_fn() 1354 1355 if dtype.is_floating_point or dtype.is_complex: 1356 self.assertTrue(res.isnan().all()) 1357 else: 1358 if dtype == torch.bool: 1359 max_val = True 1360 else: 1361 max_val = torch.iinfo(dtype).max 1362 self.assertTrue(res.eq(max_val).all()) 1363 1364 # FIXME: update OpInfos to support "nondeterministic samples" and port these tests 1365 # to that architecture 1366 @skipIfMps 1367 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1368 def test_nondeterministic_alert_AvgPool3d(self, device): 1369 module = torch.nn.AvgPool3d(3) 1370 input = torch.randn(2, 3, 3, 3, requires_grad=True, device=device) 1371 res = module(input) 1372 grad = torch.ones_like(res) 1373 1374 self.check_nondeterministic_alert( 1375 lambda: res.backward(grad, retain_graph=True), 1376 'avg_pool3d_backward_cuda', 1377 torch.device(device).type == 'cuda') 1378 1379 @skipIfMps 1380 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1381 def test_nondeterministic_alert_AdaptiveAvgPool2d(self, device): 1382 module = torch.nn.AdaptiveAvgPool2d(3) 1383 input = torch.randn(2, 3, 3, requires_grad=True, device=device) 1384 res = module(input) 1385 grad = torch.ones_like(res) 1386 1387 self.check_nondeterministic_alert( 1388 lambda: res.backward(grad, retain_graph=True), 1389 'adaptive_avg_pool2d_backward_cuda', 1390 torch.device(device).type == 'cuda') 1391 1392 @skipIfMps 1393 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1394 def test_nondeterministic_alert_AdaptiveAvgPool3d(self, device): 1395 module = torch.nn.AdaptiveAvgPool3d(3) 1396 input = torch.randn(2, 3, 3, 3, requires_grad=True, device=device) 1397 res = module(input) 1398 grad = torch.ones_like(res) 1399 1400 self.check_nondeterministic_alert( 1401 lambda: res.backward(grad, retain_graph=True), 1402 'adaptive_avg_pool3d_backward_cuda', 1403 torch.device(device).type == 'cuda') 1404 1405 @skipIfMps 1406 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1407 def test_nondeterministic_alert_MaxPool3d(self, device): 1408 module = torch.nn.MaxPool3d(3) 1409 input = torch.randn(2, 3, 3, 3, requires_grad=True, device=device) 1410 res = module(input) 1411 grad = torch.ones_like(res) 1412 1413 self.check_nondeterministic_alert( 1414 lambda: res.backward(grad, retain_graph=True), 1415 'max_pool3d_with_indices_backward_cuda', 1416 torch.device(device).type == 'cuda') 1417 1418 @skipIfMps 1419 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1420 def test_nondeterministic_alert_AdaptiveMaxPool2d(self, device): 1421 module = torch.nn.AdaptiveMaxPool2d(3) 1422 input = torch.randn(2, 3, 3, requires_grad=True, device=device) 1423 res = module(input) 1424 grad = torch.ones_like(res) 1425 1426 self.check_nondeterministic_alert( 1427 lambda: res.backward(grad, retain_graph=True), 1428 'adaptive_max_pool2d_backward_cuda', 1429 torch.device(device).type == 'cuda') 1430 1431 @skipIfMps 1432 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1433 def test_nondeterministic_alert_FractionalMaxPool2d(self, device): 1434 module = torch.nn.FractionalMaxPool2d(2, output_ratio=0.5) 1435 input = torch.randn(2, 3, 3, 3, requires_grad=True, device=device) 1436 res = module(input) 1437 grad = torch.ones_like(res) 1438 1439 self.check_nondeterministic_alert( 1440 lambda: res.backward(grad, retain_graph=True), 1441 'fractional_max_pool2d_backward_cuda', 1442 torch.device(device).type == 'cuda') 1443 1444 @skipIfMps 1445 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1446 def test_nondeterministic_alert_FractionalMaxPool3d(self, device): 1447 module = torch.nn.FractionalMaxPool3d(2, output_ratio=0.5) 1448 input = torch.randn(2, 3, 3, 3, 3, requires_grad=True, device=device) 1449 res = module(input) 1450 grad = torch.ones_like(res) 1451 1452 self.check_nondeterministic_alert( 1453 lambda: res.backward(grad, retain_graph=True), 1454 'fractional_max_pool3d_backward_cuda', 1455 torch.device(device).type == 'cuda') 1456 1457 @dtypes(*floating_types_and(torch.half)) 1458 @onlyNativeDeviceTypes 1459 def test_nondeterministic_alert_MaxUnpool1d(self, device, dtype): 1460 if dtype == torch.half and torch.device(device).type == 'cpu': 1461 self.skipTest('float16 not implemented on CPU') 1462 1463 module = torch.nn.MaxUnpool1d(3, 1) 1464 input = torch.randn(1, 1, 7, dtype=dtype, device=device) 1465 indices = torch.zeros_like(input, dtype=torch.long, device=device) 1466 1467 self.check_nondeterministic_alert( 1468 lambda: module(input, indices), 1469 'max_unpooling2d_forward_out') 1470 1471 @dtypes(*floating_types_and(torch.half)) 1472 @onlyNativeDeviceTypes 1473 def test_nondeterministic_alert_MaxUnpool2d(self, device, dtype): 1474 if dtype == torch.half and torch.device(device).type == 'cpu': 1475 self.skipTest('float16 not implemented on CPU') 1476 1477 module = torch.nn.MaxUnpool2d(3, 1) 1478 input = torch.randn(1, 1, 7, 7, dtype=dtype, device=device) 1479 indices = torch.zeros_like(input, dtype=torch.long, device=device) 1480 1481 self.check_nondeterministic_alert( 1482 lambda: module(input, indices), 1483 'max_unpooling2d_forward_out') 1484 1485 @dtypes(*floating_types_and(torch.half)) 1486 @onlyNativeDeviceTypes 1487 def test_nondeterministic_alert_MaxUnpool3d(self, device, dtype): 1488 if dtype == torch.half and torch.device(device).type == 'cpu': 1489 self.skipTest('float16 not implemented on CPU') 1490 1491 module = torch.nn.MaxUnpool3d(3, 1) 1492 input = torch.randn(1, 1, 7, 7, 7, dtype=dtype, device=device) 1493 indices = torch.zeros_like(input, dtype=torch.long, device=device) 1494 1495 self.check_nondeterministic_alert( 1496 lambda: module(input, indices), 1497 'max_unpooling3d_forward_out') 1498 1499 @skipIfMps 1500 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1501 def test_nondeterministic_alert_interpolate_linear(self, device): 1502 input = torch.randn(1, 2, 4, device=device, requires_grad=True) 1503 res = torch.nn.functional.interpolate( 1504 input, 1505 size=12, 1506 mode='linear', 1507 align_corners=False) 1508 grad = torch.ones_like(res) 1509 1510 self.check_nondeterministic_alert( 1511 lambda: res.backward(grad), 1512 'upsample_linear1d_backward_out_cuda', 1513 torch.device(device).type == 'cuda') 1514 1515 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1516 def test_nondeterministic_alert_interpolate_bilinear(self, device): 1517 input = torch.randn(1, 2, 4, 4, device=device, requires_grad=True) 1518 res = torch.nn.functional.interpolate( 1519 input, 1520 size=12, 1521 mode='bilinear', 1522 align_corners=False) 1523 grad = torch.ones_like(res) 1524 1525 self.check_nondeterministic_alert( 1526 lambda: res.backward(grad), 1527 'upsample_bilinear2d_backward_out_cuda', 1528 torch.device(device).type == 'cuda') 1529 1530 @skipIfTorchInductor("aot-autograd issue") 1531 def test_deterministic_replication_pad2d(self, device): 1532 test_cases = [ 1533 # size, padding 1534 [(1, 2, 4, 4), (0, 0, 0, 0)], 1535 [(1, 2, 4, 4), (3, 4, 5, 6)], 1536 [(3, 8, 7), (0, 0, 0, 0)], 1537 [(3, 8, 7), (4, 3, 2, 7)], 1538 ] 1539 1540 if torch.device(device).type != 'xla': 1541 test_cases += [ 1542 [(4, 3, 5, 10), (-9, 4, 5, 6)], 1543 [(3, 8, 7), (-4, -2, -2, -3)], 1544 ] 1545 1546 for size, padding in test_cases: 1547 input = torch.randn(*size, device=device, requires_grad=True) 1548 grad = None 1549 with DeterministicGuard(True): 1550 res = torch.nn.functional.pad( 1551 input, 1552 padding, 1553 mode='replicate') 1554 res.backward(torch.ones_like(res)) 1555 if grad is None: 1556 grad = input.grad 1557 else: 1558 self.assertEqual(grad, input.grad, atol=0, rtol=0) 1559 input.grad = None 1560 1561 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1562 def test_deterministic_interpolate_bilinear(self, device): 1563 input = torch.randn(1, 2, 4, 4, device=device, requires_grad=True) 1564 grad = None 1565 with DeterministicGuard(True): 1566 for _ in range(5): 1567 res = torch.nn.functional.interpolate( 1568 input, 1569 size=12, 1570 mode='bilinear', 1571 align_corners=False) 1572 res.backward(torch.ones_like(res)) 1573 if grad is None: 1574 grad = input.grad 1575 else: 1576 self.assertEqual(grad, input.grad, atol=0, rtol=0) 1577 input.grad = None 1578 1579 @skipIfMps 1580 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1581 def test_nondeterministic_alert_interpolate_bicubic(self, device): 1582 input = torch.randn(1, 2, 4, 4, device=device, requires_grad=True) 1583 res = torch.nn.functional.interpolate( 1584 input, 1585 size=12, 1586 mode='bicubic', 1587 align_corners=False) 1588 grad = torch.ones_like(res) 1589 1590 self.check_nondeterministic_alert( 1591 lambda: res.backward(grad), 1592 'upsample_bicubic2d_backward_out_cuda', 1593 torch.device(device).type == 'cuda') 1594 1595 @skipIfMps 1596 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1597 def test_nondeterministic_alert_interpolate_trilinear(self, device): 1598 input = torch.randn(1, 2, 4, 4, 4, device=device, requires_grad=True) 1599 res = torch.nn.functional.interpolate( 1600 input, 1601 size=12, 1602 mode='trilinear', 1603 align_corners=False) 1604 grad = torch.ones_like(res) 1605 1606 self.check_nondeterministic_alert( 1607 lambda: res.backward(grad), 1608 'upsample_trilinear3d_backward_out_cuda', 1609 torch.device(device).type == 'cuda') 1610 1611 @skipIfMps 1612 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1613 def test_nondeterministic_alert_ReflectionPad1d(self, device): 1614 module = torch.nn.ReflectionPad1d((1, 2)) 1615 input = torch.randn(2, 3, 8, device=device, requires_grad=True) 1616 res = module(input) 1617 grad = torch.ones_like(res) 1618 1619 self.check_nondeterministic_alert( 1620 lambda: res.backward(grad, retain_graph=True), 1621 'reflection_pad1d_backward_out_cuda', 1622 torch.device(device).type == 'cuda') 1623 1624 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1625 def test_nondeterministic_alert_ReflectionPad2d(self, device): 1626 module = torch.nn.ReflectionPad2d((1, 2, 3, 4)) 1627 input = torch.randn(2, 3, 8, 8, device=device, requires_grad=True) 1628 res = module(input) 1629 grad = torch.ones_like(res) 1630 1631 self.check_nondeterministic_alert( 1632 lambda: res.backward(grad, retain_graph=True), 1633 'reflection_pad2d_backward_cuda', 1634 torch.device(device).type == 'cuda') 1635 1636 @skipIfMps 1637 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1638 def test_nondeterministic_alert_ReflectionPad3d(self, device): 1639 module = torch.nn.ReflectionPad3d((1, 2, 3, 4, 5, 6)) 1640 input = torch.randn(2, 3, 8, 8, 8, device=device, requires_grad=True) 1641 res = module(input) 1642 grad = torch.ones_like(res) 1643 1644 self.check_nondeterministic_alert( 1645 lambda: res.backward(grad, retain_graph=True), 1646 'reflection_pad3d_backward_out_cuda', 1647 torch.device(device).type == 'cuda') 1648 1649 @skipIfMps 1650 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1651 def test_nondeterministic_alert_ReplicationPad1d(self, device): 1652 module = torch.nn.ReplicationPad1d((1, 2)) 1653 input = torch.randn(2, 3, 4, device=device, requires_grad=True) 1654 res = module(input) 1655 grad = torch.ones_like(res) 1656 1657 self.check_nondeterministic_alert( 1658 lambda: res.backward(grad, retain_graph=True), 1659 'replication_pad1d_backward_cuda', 1660 torch.device(device).type == 'cuda') 1661 1662 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1663 def test_nondeterministic_alert_ReplicationPad2d(self, device): 1664 module = torch.nn.ReplicationPad2d((1, 2, 3, 4)) 1665 input = torch.randn(2, 3, 4, 4, device=device, requires_grad=True) 1666 res = module(input) 1667 grad = torch.ones_like(res) 1668 1669 # Nondeterministic alert should only be raised if the forward call was 1670 # nondeterministic 1671 self.check_nondeterministic_alert( 1672 lambda: res.backward(grad, retain_graph=True), 1673 'replication_pad2d_backward_cuda', 1674 torch.device(device).type == 'cuda') 1675 1676 with DeterministicGuard(True): 1677 res = module(input) 1678 1679 grad = torch.ones_like(res) 1680 1681 # If the forward call was deterministic, nondeterministic alert should 1682 # not be raised 1683 self.check_nondeterministic_alert( 1684 lambda: res.backward(grad, retain_graph=True), 1685 'replication_pad2d_backward_cuda', 1686 False) 1687 1688 @skipIfMps 1689 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1690 def test_nondeterministic_alert_ReplicationPad3d(self, device): 1691 module = torch.nn.ReplicationPad3d((1, 2, 3, 4, 5, 6)) 1692 input = torch.randn(2, 3, 4, 4, 4, device=device, requires_grad=True) 1693 res = module(input) 1694 grad = torch.ones_like(res) 1695 1696 self.check_nondeterministic_alert( 1697 lambda: res.backward(grad, retain_graph=True), 1698 'replication_pad3d_backward_cuda', 1699 torch.device(device).type == 'cuda') 1700 1701 @skipIfTorchDynamo("Warning is not raised.") 1702 def test_nondeterministic_alert_NLLLoss(self, device): 1703 module = torch.nn.NLLLoss() 1704 input = torch.randn(2, 3, 5, 5, device=device) 1705 target = torch.rand(2, 5, 5, device=device).mul(3).floor().long() 1706 1707 1708 self.check_nondeterministic_alert( 1709 lambda: module(input, target), 1710 'nll_loss2d_forward_out_cuda_template', 1711 torch.device(device).type == 'cuda') 1712 1713 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1714 def test_nondeterministic_alert_CTCLoss(self, device): 1715 module = torch.nn.CTCLoss() 1716 input = torch.randn(50, 3, 15, device=device, requires_grad=True) 1717 target = torch.randint(0, 14, (3, 30), device=device) 1718 input_lengths = [50, 50, 50] 1719 target_lengths = [30, 25, 20] 1720 res = module(input, target, input_lengths, target_lengths) 1721 grad = torch.ones_like(res) 1722 1723 self.check_nondeterministic_alert( 1724 lambda: res.backward(grad, retain_graph=True), 1725 'ctc_loss_backward_gpu', 1726 torch.device(device).type == 'cuda') 1727 1728 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1729 def test_nondeterministic_alert_EmbeddingBag_max(self, device): 1730 module = torch.nn.EmbeddingBag( 1731 4, 3, None, 2., False, 'max', 1732 _weight=torch.randn(4, 3, device=device, requires_grad=True)) 1733 input = torch.randint(0, 3, (4, 3), device=device) 1734 res = module(input) 1735 grad = torch.ones_like(res) 1736 1737 self.check_nondeterministic_alert( 1738 lambda: res.backward(grad, retain_graph=True), 1739 'embedding_bag_backward_cuda_max', 1740 torch.device(device).type == 'cuda') 1741 1742 @dtypes(*all_types_and_complex_and(torch.bool)) 1743 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1744 def test_nondeterministic_alert_cumsum(self, device, dtype): 1745 input = make_tensor((10,), dtype=dtype, device=device, low=-9, high=9) 1746 should_alert = torch.device(device).type == 'cuda' and (dtype.is_floating_point or dtype.is_complex) 1747 1748 for op_call in [torch.Tensor.cumsum, torch.cumsum]: 1749 self.check_nondeterministic_alert( 1750 lambda: op_call(input, 0), 1751 'cumsum_cuda_kernel', 1752 should_alert) 1753 1754 @expectedFailureMeta # expected a non-determinitic error, but it was not raised 1755 @onlyNativeDeviceTypes 1756 def test_nondeterministic_alert_put(self, device): 1757 a = torch.randn(10, device=device) 1758 indices = torch.tensor([0, 0], device=device) 1759 values = torch.tensor([0., 1.], device=device) 1760 1761 for op_call in [torch.Tensor.put, torch.Tensor.put_]: 1762 self.check_nondeterministic_alert( 1763 lambda: op_call(a, indices, values, accumulate=False), 1764 'put_') 1765 1766 # warn_only=False correctly raises RuntimeError: put_ does not have a deterministic implementation 1767 # warn_only=True logs warning from the FallbackKernel: torch.ops.aten.put_.default, instead of as UserWarning: 1768 # [W Context.cpp:%(lineno)] Warning: put_ does not have a deterministic implementation 1769 @skipIfTorchInductor("warning is logged from the FallbackKernel: torch.ops.aten.put_.default when warn_only=True") 1770 def test_nondeterministic_alert_put_accumulate(self, device): 1771 a = torch.randn(10, device=device) 1772 indices = torch.tensor([0, 0], device=device) 1773 values = torch.tensor([0., 1.], device=device) 1774 1775 for op_call in [torch.Tensor.put, torch.Tensor.put_]: 1776 self.check_nondeterministic_alert( 1777 lambda: op_call(a, indices, values, accumulate=True), 1778 'put_', 1779 torch.device(device).type == 'cuda') 1780 1781 @skipIfMps 1782 def test_nondeterministic_alert_histc(self, device): 1783 a = torch.tensor([], device=device) 1784 for op_call in [torch.histc, torch.Tensor.histc]: 1785 self.check_nondeterministic_alert( 1786 lambda: op_call(a, min=0, max=3), 1787 '_histc_cuda', 1788 torch.device(device).type == 'cuda') 1789 1790 @skipIfMps 1791 def test_nondeterministic_alert_bincount(self, device): 1792 a = torch.tensor([], device=device, dtype=torch.long) 1793 weights = torch.tensor([], device=device) 1794 1795 for op_call in [torch.bincount, torch.Tensor.bincount]: 1796 # Error should only be raised when device is CUDA and weights are 1797 # given 1798 self.check_nondeterministic_alert( 1799 lambda: op_call(a, weights), 1800 '_bincount_cuda', 1801 torch.device(device).type == 'cuda') 1802 1803 self.check_nondeterministic_alert( 1804 lambda: op_call(a), 1805 '_bincount_cuda', 1806 False) 1807 1808 # Ensures that kthvalue throws nondeterministic alerts in the correct cases 1809 @dtypes(torch.double) 1810 def test_nondeterministic_alert_kthvalue(self, device, dtype): 1811 def test_func(call_type): 1812 S = 10 1813 k = 5 1814 a = torch.randn(S, device=device) 1815 if call_type == 'function': 1816 torch.kthvalue(a, k) 1817 elif call_type == 'method': 1818 a.kthvalue(k) 1819 elif call_type == 'out': 1820 values = torch.empty_like(a) 1821 indices = torch.empty((), device=device, dtype=torch.long) 1822 torch.kthvalue(a, k, out=(values, indices)) 1823 else: 1824 self.fail(f"'{call_type}' is not a valid call type") 1825 1826 for call_type in ['function', 'method', 'out']: 1827 self.check_nondeterministic_alert( 1828 lambda: test_func('function'), 1829 'kthvalue CUDA', 1830 torch.device(device).type == 'cuda') 1831 1832 @skipIfMps 1833 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1834 def test_nondeterministic_alert_grid_sample_2d(self, device): 1835 input = torch.empty(1, 1, 2, 2, device=device, requires_grad=True) 1836 grid = torch.empty(1, 1, 1, 2, device=device) 1837 res = torch.nn.functional.grid_sample(input, grid, align_corners=False) 1838 grad = torch.ones_like(res) 1839 1840 self.check_nondeterministic_alert( 1841 lambda: res.backward(grad, retain_graph=True), 1842 'grid_sampler_2d_backward_cuda', 1843 torch.device(device).type == 'cuda') 1844 1845 @skipIfMps 1846 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/113707") 1847 def test_nondeterministic_alert_grid_sample_3d(self, device): 1848 input = torch.empty(1, 1, 2, 2, 2, device=device, requires_grad=True) 1849 grid = torch.empty(1, 1, 1, 2, 3, device=device) 1850 res = torch.nn.functional.grid_sample(input, grid, align_corners=False) 1851 grad = torch.ones_like(res) 1852 1853 self.check_nondeterministic_alert( 1854 lambda: res.backward(grad, retain_graph=True), 1855 'grid_sampler_3d_backward_cuda', 1856 torch.device(device).type == 'cuda') 1857 1858 def test_invalid_shapes_grid_sampler(self, device): 1859 make_arg = partial( 1860 make_tensor, device=device, dtype=torch.float64, requires_grad=True) 1861 1862 inputs = ( 1863 # input, grid 1864 ((5, 5, 5, 5, 5,), (1, 1, 1, 4, 4,)), # 3d 1865 ((5, 5, 5, 5,), (1, 1, 4, 4,)), # 2d 1866 ) 1867 1868 interpolation_mode = 0 1869 padding_mode = 0 1870 align_corners = True 1871 1872 err = "expected grid and input to have same batch size" 1873 1874 for input, grid in inputs: 1875 input = make_arg(input) 1876 grid = make_arg(grid, low=-1, high=1) 1877 1878 # Wrapper for the 2d, 3d, and cuDNN functions listed below. 1879 with self.assertRaisesRegex(RuntimeError, err): 1880 torch.grid_sampler( 1881 input, grid, interpolation_mode, padding_mode, 1882 align_corners) 1883 1884 # Expects 2d input. 1885 with self.assertRaisesRegex(RuntimeError, err): 1886 torch.grid_sampler_2d( 1887 input, grid, interpolation_mode, padding_mode, 1888 align_corners) 1889 1890 # Expects 3d input. 1891 with self.assertRaisesRegex(RuntimeError, err): 1892 torch.grid_sampler_3d( 1893 input, grid, interpolation_mode, padding_mode, 1894 align_corners) 1895 1896 # Expects 2d input. 1897 with self.assertRaisesRegex(RuntimeError, err): 1898 torch._grid_sampler_2d_cpu_fallback( 1899 input, grid, interpolation_mode, padding_mode, 1900 align_corners) 1901 1902 # Expects 2d input, on CUDA. 1903 # Doesn't work on CPU and ROCm. 1904 if device != 'cpu' and TEST_CUDNN and not TEST_WITH_ROCM: 1905 with self.assertRaisesRegex(RuntimeError, err): 1906 torch.cudnn_grid_sampler(input, grid) 1907 1908 def test_dist(self, device): 1909 def run_test(x, y): 1910 for p in [0, 1, 2, 3, 4, inf, -inf]: 1911 dist_xy = torch.dist(x, y, p) 1912 dist_xy_norm = torch.norm(x - y, p) 1913 self.assertEqual(dist_xy, dist_xy_norm) 1914 1915 run_test(torch.randn(5, device=device), torch.randn(5, device=device)) 1916 1917 x = torch.zeros(3, device=device) 1918 y = torch.zeros(3, device=device) 1919 y[1] = 1. 1920 run_test(x, y) 1921 1922 # Ensures that median throws nondeterministic alerts in the correct cases 1923 @dtypes(torch.double) 1924 def test_nondeterministic_alert_median(self, device, dtype): 1925 def test_func(call_type): 1926 S = 10 1927 a = torch.randn(S, device=device) 1928 if call_type == 'function': 1929 torch.median(a) 1930 elif call_type == 'function with indices': 1931 torch.median(a, 0) 1932 elif call_type == 'method': 1933 a.median() 1934 elif call_type == 'method with indices': 1935 a.median(0) 1936 elif call_type == 'out with indices': 1937 result = torch.empty_like(a) 1938 indices = torch.empty((), dtype=torch.long, device=device) 1939 torch.median(a, 0, out=(result, indices)) 1940 else: 1941 self.fail(f"'{call_type}' is not a valid call type") 1942 1943 def test_func_expect_error(call_type, should_error): 1944 self.check_nondeterministic_alert( 1945 lambda: test_func(call_type), 1946 'median CUDA with indices output', 1947 should_error) 1948 1949 is_cuda = torch.device(device).type == 'cuda' 1950 1951 test_func_expect_error('function', False) 1952 test_func_expect_error('function with indices', is_cuda) 1953 test_func_expect_error('method', False) 1954 test_func_expect_error('method with indices', is_cuda) 1955 test_func_expect_error('out with indices', is_cuda) 1956 1957 # FIXME: move to test_scatter_gather_ops 1958 def _test_gather_backward_one_dim(self, device, deterministic: bool = False) -> None: 1959 with DeterministicGuard(deterministic): 1960 m = random.randint(2000, 3000) 1961 elems = random.randint(10 * m, 20 * m) 1962 dim = 0 1963 src = torch.randn(m, device=device, requires_grad=True) 1964 idx = torch.randint(m, (elems,), device=device) 1965 res = torch.gather(src, dim, idx) 1966 weight = torch.rand_like(res, device=device) * 10 ** 6 1967 res.backward(weight) 1968 assert src.grad is not None 1969 grad = src.grad.detach().clone() 1970 1971 if torch.device(device).type == 'cuda': 1972 for _ in range(2): 1973 src.grad.data.zero_() 1974 res = torch.gather(src, dim, idx) 1975 res.backward(weight) 1976 self.assertEqual(src.grad, grad, atol=0, rtol=0) 1977 else: 1978 expected = torch.zeros_like(src, device=device) 1979 for i in range(elems): 1980 expected[idx[i]] += weight[i] 1981 self.assertEqual(grad, expected, atol=0, rtol=0) 1982 1983 # FIXME: move to test_scatter_gather_ops 1984 @onlyNativeDeviceTypes 1985 def test_gather_backward_deterministic_path(self, device) -> None: 1986 self._test_gather_backward_one_dim(device, True) 1987 1988 # FIXME: move to test_scatter_gather_ops 1989 @onlyCPU 1990 def test_gather_backward_one_dim(self, device) -> None: 1991 self._test_gather_backward_one_dim(device, False) 1992 1993 # FIXME: move to test_scatter_gather_ops 1994 @onlyNativeDeviceTypes 1995 def test_scatter_add_one_dim_deterministic(self, device) -> None: 1996 with DeterministicGuard(True): 1997 m = random.randint(20, 30) 1998 elems = random.randint(2000 * m, 3000 * m) 1999 dim = 0 2000 src = torch.randn(elems, device=device) 2001 idx = torch.randint(m, (elems,), device=device) 2002 2003 x = torch.zeros(m, device=device) 2004 res = x.scatter_add(dim, idx, src) 2005 2006 # Checking if scatter_add is deterministic 2007 for i in range(5): 2008 res_next = x.scatter_add(dim, idx, src) 2009 self.assertEqual(res, res_next, atol=0, rtol=0) 2010 res = res_next 2011 2012 expected = torch.zeros(m, device=device) 2013 for i in range(elems): 2014 expected[idx[i]] += src[i] 2015 2016 self.assertEqual(res, expected, atol=1e-4, rtol=1e-5) 2017 2018 # FIXME: move to test_scatter_gather_ops 2019 @onlyNativeDeviceTypes 2020 def test_scatter_zero_size_index(self, device) -> None: 2021 null_index = torch.zeros((0, 4), dtype=torch.int64) 2022 null_arr = torch.zeros((0, 4)) 2023 original = torch.arange(4, dtype=torch.float32) 2024 result = original.scatter(0, null_index, null_arr) 2025 self.assertEqual(result, original, atol=0, rtol=0) 2026 2027 @onlyCUDA 2028 @skipIfTorchInductor("FIXME") 2029 def test_sync_warning(self, device): 2030 2031 def _sync_raises_helper(f, level): 2032 with CudaSyncGuard(level): 2033 if level == 1: 2034 with self.assertWarnsRegex(UserWarning, "called a synchronizing "): 2035 f() 2036 elif level == 2: 2037 with self.assertRaisesRegex(RuntimeError, "called a synchronizing "): 2038 f() 2039 2040 def _no_sync_helper(f, level): 2041 with CudaSyncGuard(level): 2042 f() 2043 2044 def _ind_put_fn(x, ind, val): 2045 x[ind] = val 2046 return x 2047 2048 def _ind_get_fn(x, ind): 2049 return x[ind] 2050 2051 def _cond_fn(x): 2052 if x: # taking boolean value of a tensor synchronizes 2053 return x 2054 else: 2055 return 2 * x 2056 2057 # prepare inputs for subsequent ops 2058 size = 4 2059 x = torch.rand(size, device=device) 2060 y = torch.rand((), device=device) 2061 ind = torch.randint(size, (3,), device=device) 2062 ind_cpu = ind.cpu() 2063 repeats = torch.full((1,), 2, device=device) 2064 mask = torch.randint(2, (size,), device=device, dtype=bool) 2065 expect_no_sync = (lambda: _ind_put_fn(x, mask, 1.), 2066 lambda: _ind_put_fn(x, ind, y), 2067 lambda: _ind_get_fn(x, ind), 2068 lambda: torch.nn.functional.one_hot(ind, num_classes=size), 2069 lambda: torch.randperm(20000, device=device), 2070 lambda: torch.repeat_interleave(x, 2, output_size=2 * size), 2071 lambda: torch.repeat_interleave(x, repeats, output_size=2 * size), 2072 lambda: torch.any(y)) 2073 expect_sync = (lambda: _ind_put_fn(x, mask, y), 2074 lambda: _ind_put_fn(x, ind_cpu, y), 2075 lambda: _ind_get_fn(x, mask), 2076 lambda: _ind_get_fn(x, ind_cpu), 2077 lambda: x.nonzero(), 2078 lambda: _cond_fn(y), 2079 lambda: torch.nn.functional.one_hot(ind), 2080 lambda: torch.repeat_interleave(x, repeats)) 2081 for f, level in product(expect_no_sync, (1, 2)): 2082 _no_sync_helper(f, level) 2083 for f, level in product(expect_sync, (1, 2)): 2084 _sync_raises_helper(f, level) 2085 2086 2087 @dtypes(*floating_types_and(torch.half, torch.bfloat16)) 2088 @skipIfMps 2089 def test_log_normal(self, device, dtype): 2090 a = torch.tensor([10], dtype=dtype, device=device).log_normal_() 2091 self.assertEqual(a.dtype, dtype) 2092 self.assertEqual(a.size(), torch.Size([1])) 2093 2094 @dtypes(*all_types_and(torch.half, torch.bfloat16)) 2095 @skipIfMps 2096 def test_geometric(self, device, dtype): 2097 a = torch.tensor([10], dtype=dtype, device=device).geometric_(0.5) 2098 self.assertEqual(a.dtype, dtype) 2099 self.assertEqual(a.size(), torch.Size([1])) 2100 2101 @skipIfMps 2102 def test_repeat_interleave(self, device): 2103 y = torch.tensor([[1, 2], [3, 4]], device=device) 2104 # exercise single argument function signature 2105 temp = y.repeat_interleave(2) 2106 self.assertEqual(torch.Size([8]), temp.size()) 2107 2108 for dtype in [torch.int, torch.long]: 2109 lengths = torch.tensor([1, 2], dtype=dtype, device=device) 2110 output_size = torch.sum(lengths) 2111 a = torch.repeat_interleave( 2112 y, 2113 lengths, 2114 dim=0, 2115 ) 2116 self.assertEqual(a.dtype, y.dtype) 2117 self.assertEqual(a.size(), torch.Size([3, 2])) 2118 2119 a_with_output = torch.repeat_interleave( 2120 y, 2121 lengths, 2122 dim=0, 2123 output_size=output_size, 2124 ) 2125 self.assertEqual(a_with_output.dtype, y.dtype) 2126 self.assertEqual(a_with_output.size(), torch.Size([3, 2])) 2127 2128 @dtypes(*floating_types()) 2129 @dtypesIfCPU(*floating_types_and(torch.bfloat16, torch.half)) 2130 @dtypesIfCUDA(*floating_types_and(torch.half)) 2131 def test_bernoulli_p(self, device, dtype): 2132 for trivial_p in ([0, 1], [1, 0, 1, 1, 0, 1]): 2133 x = torch.tensor(trivial_p, dtype=dtype, device=device) 2134 self.assertEqual(x.bernoulli().tolist(), trivial_p) 2135 2136 def isBinary(t): 2137 return torch.ne(t, 0).mul_(torch.ne(t, 1)).sum().item() == 0 2138 2139 p = torch.rand(5, 5, dtype=dtype, device=device) 2140 self.assertTrue(isBinary(p.bernoulli())) 2141 2142 p = torch.rand(5, dtype=dtype, device=device).expand(5, 5) 2143 self.assertTrue(isBinary(p.bernoulli())) 2144 2145 p = torch.rand(5, 5, dtype=dtype, device=device) 2146 torch.bernoulli(torch.rand_like(p), out=p) 2147 self.assertTrue(isBinary(p)) 2148 2149 # RngUniform not implemented for Integral type in XLA test 2150 @dtypes(*floating_types()) 2151 @dtypesIfCPU(*all_types_and(torch.bool, torch.half)) 2152 @dtypesIfCUDA(*all_types_and(torch.bool, torch.half)) 2153 def test_bernoulli_self(self, device, dtype): 2154 2155 def isBinary(t): 2156 return torch.ne(t, 0).mul_(torch.ne(t, 1)).sum().item() == 0 2157 2158 t = torch.empty(10, 10, dtype=dtype, device=device) 2159 2160 t.fill_(2) 2161 t.bernoulli_(0.5) 2162 self.assertTrue(isBinary(t)) 2163 2164 for p_dtype in floating_types_and(*[torch.half] if device.startswith('cuda') else []): 2165 p = torch.rand(10, dtype=p_dtype, device=device).expand(10, 10) 2166 t.fill_(2) 2167 t.bernoulli_(p) 2168 self.assertTrue(isBinary(t)) 2169 2170 t.fill_(2) 2171 torch.bernoulli(torch.rand_like(t, dtype=p_dtype), out=t) 2172 self.assertTrue(isBinary(t)) 2173 2174 t.fill_(2) 2175 t.bernoulli_(torch.rand_like(t, dtype=p_dtype)) 2176 self.assertTrue(isBinary(t)) 2177 2178 @slowTest 2179 @dtypes(*floating_types_and(torch.half)) 2180 @dtypesIfCUDA(*floating_types_and(torch.half)) 2181 def test_bernoulli_edge_cases(self, device, dtype): 2182 # Need to draw a lot of samples to cover every random floating point number. 2183 a = torch.zeros(10000, 10000, dtype=dtype, device=device) # probability of drawing "1" is 0 2184 num_ones = (torch.bernoulli(a) == 1).sum() 2185 self.assertEqual(num_ones, 0) 2186 2187 b = torch.ones(10000, 10000, dtype=dtype, device=device) # probability of drawing "1" is 1 2188 num_zeros = (torch.bernoulli(b) == 0).sum() 2189 self.assertEqual(num_zeros, 0) 2190 2191 @dtypes(*floating_types_and(torch.half, torch.bfloat16)) 2192 @skipIfMps 2193 def test_exponential(self, device, dtype): 2194 a = torch.tensor([10], dtype=dtype, device=device).exponential_(0.5) 2195 self.assertEqual(a.dtype, dtype) 2196 self.assertEqual(a.size(), torch.Size([1])) 2197 2198 # Tests extremal behavior 2199 t = torch.empty((1,), device=device, dtype=dtype).exponential_(float('inf')) 2200 self.assertTrue(t.item() == 0) 2201 2202 # Tests that negative lambda fails 2203 with self.assertRaises(RuntimeError): 2204 torch.empty((1,), device=device, dtype=dtype).exponential_(-0.5) 2205 2206 @onlyCUDA 2207 @dtypes(torch.half, torch.float) 2208 def test_exponential_no_zero(self, device, dtype): 2209 # naively, 0 in exponential can be generated with probability 2^-24 2210 # so we need more samples to check if it's not generated 2211 # instead of doing one 2212 # don't test CPU, that would be a long test 2213 x = torch.empty(50000000, device=device, dtype=dtype).exponential_() 2214 self.assertTrue(x.min() > 0) 2215 2216 def _generate_correlation_tensors(self, device, dtype): 2217 yield make_tensor((0, 0), dtype=dtype, device=device) 2218 yield make_tensor((1, 0), dtype=dtype, device=device) 2219 yield make_tensor((0, 1), dtype=dtype, device=device) 2220 yield make_tensor((2,), dtype=dtype, device=device) 2221 yield make_tensor((2, 1), dtype=dtype, device=device) 2222 yield make_tensor((2, 2), dtype=dtype, device=device) 2223 yield make_tensor((2, 3), dtype=dtype, device=device) 2224 yield make_tensor((5, 10), dtype=dtype, device=device) 2225 yield make_tensor((5, 10), dtype=dtype, device=device, noncontiguous=True) 2226 if dtype != torch.int: 2227 yield torch.tensor([0, -2, nan, 10.2, inf], dtype=dtype, device=device) 2228 2229 @onlyNativeDeviceTypes 2230 @dtypes(torch.int, torch.float, torch.cfloat) 2231 def test_corrcoef(self, device, dtype): 2232 for x in self._generate_correlation_tensors(device, dtype): 2233 res = torch.corrcoef(x) 2234 ref = np.corrcoef(x.cpu().numpy()) 2235 self.assertEqual(res, ref, exact_dtype=False) 2236 2237 @skipRocmIfTorchInductor 2238 @dtypes(torch.int, torch.float, torch.cfloat) 2239 def test_cov(self, device, dtype): 2240 def check(t, correction=1, fweights=None, aweights=None): 2241 res = torch.cov(t, correction=correction, fweights=fweights, aweights=aweights) 2242 t = t.cpu().numpy() 2243 fweights = fweights.cpu().numpy() if fweights is not None else None 2244 aweights = aweights.cpu().numpy() if aweights is not None else None 2245 ref = np.cov(t, ddof=correction, fweights=fweights, aweights=aweights) 2246 self.assertEqual(res, ref, atol=1e-05, rtol=1e-05, exact_dtype=False) 2247 2248 for x in self._generate_correlation_tensors(device, dtype): 2249 check(x) 2250 num_observations = x.numel() if x.ndim < 2 else x.size(1) 2251 if num_observations > 0: 2252 fweights = torch.randint(1, 10, (num_observations,), device=device) 2253 aweights = make_tensor((num_observations,), dtype=torch.float, device=device, low=1) 2254 for correction, fw, aw in product([0, 1, 2], [None, fweights], [None, aweights]): 2255 check(x, correction, fweights, aweights) 2256 2257 @skipIfNoSciPy 2258 @dtypes(*floating_types_and(torch.half, torch.bfloat16)) 2259 def test_uniform_kstest(self, device, dtype): 2260 from scipy import stats 2261 size = 1000 2262 for from_ in [-42, 0, 4.2]: 2263 for to_ in [-4.2, 0, 42]: 2264 if to_ > from_: 2265 t = torch.empty(size, dtype=dtype, device=device).uniform_(from_, to_) 2266 res = stats.kstest(t.cpu().to(torch.double), 'uniform', args=(from_, (to_ - from_))) 2267 self.assertTrue(res.statistic < 0.1) 2268 2269 @skipIfNoSciPy 2270 @dtypes(*floating_types_and(torch.half)) 2271 @dtypesIfCUDA(*floating_types_and(torch.half, torch.bfloat16)) 2272 def test_normal_kstest(self, device, dtype): 2273 from scipy import stats 2274 size = 1000 2275 for mean in [-10, 0, 50]: 2276 for std in [1, 5, 10]: 2277 t = torch.empty(size, dtype=dtype, device=device).normal_(mean=mean, std=std) 2278 res = stats.kstest(t.cpu().to(torch.double), 'norm', args=(mean, std)) 2279 self.assertTrue(res.statistic < 0.1) 2280 2281 @skipIfMps 2282 @skipIfNoSciPy 2283 @skipRocmIfTorchInductor 2284 @dtypes(*floating_types_and(torch.half, torch.bfloat16)) 2285 def test_lognormal_kstest(self, device, dtype): 2286 from scipy import stats 2287 size = 1000 2288 for mean in [-3, 0, 7]: 2289 for std in [1, 5, 7]: 2290 t = torch.empty(size, dtype=dtype, device=device).log_normal_(mean=mean, std=std) 2291 res = stats.kstest(t.cpu().to(torch.double), 'lognorm', args=(std, 0, math.exp(mean))) 2292 if dtype == torch.half: 2293 self.assertTrue(res.statistic < 0.3) 2294 else: 2295 self.assertTrue(res.statistic < 0.1) 2296 2297 @skipIfMps 2298 @skipIfNoSciPy 2299 @dtypes(*floating_types_and(torch.half, torch.bfloat16)) 2300 def test_exponential_kstest(self, device, dtype): 2301 from scipy import stats 2302 size = 1000 2303 for lambd in [0.5, 1.0, 5.0]: 2304 t = torch.empty(size, dtype=dtype, device=device).exponential_(lambd=lambd) 2305 res = stats.kstest(t.cpu().to(torch.double), 'expon', args=(0, 1 / lambd,)) 2306 self.assertTrue(res.statistic < 0.1) 2307 2308 @skipIfMps 2309 @skipIfNoSciPy 2310 @skipRocmIfTorchInductor 2311 @dtypes(*floating_types_and(torch.half, torch.bfloat16)) 2312 def test_cauchy_kstest(self, device, dtype): 2313 from scipy import stats 2314 size = 1000 2315 for median in [-10, 0, 50]: 2316 for sigma in [0.5, 1.0, 10.0]: 2317 t = torch.empty(size, dtype=dtype, device=device).cauchy_(median=median, sigma=sigma) 2318 res = stats.kstest(t.cpu().to(torch.double), 'cauchy', args=(median, sigma)) 2319 self.assertTrue(res.statistic < 0.1) 2320 2321 @slowTest 2322 @onlyCUDA 2323 @dtypes(torch.bfloat16, torch.float32) 2324 def test_cauchy_no_inf(self, device, dtype): 2325 # torch.float16 will have `inf` because of its smaller range. 2326 for _ in range((2**16) * 2): 2327 x = torch.empty((2**16), dtype=dtype, device=device) 2328 x.cauchy_() 2329 self.assertFalse(x.isinf().sum()) 2330 2331 @dtypes(*floating_types_and(torch.half, torch.bfloat16)) 2332 def test_cauchy(self, device, dtype): 2333 a = torch.tensor([10], dtype=dtype, device=device).cauchy_(0.0, 0.5) 2334 self.assertEqual(a.dtype, dtype) 2335 self.assertEqual(a.size(), torch.Size([1])) 2336 2337 # Tests extremal behavior 2338 t = torch.empty((1,), device=device, dtype=dtype).cauchy_(float('inf'), 0.5) 2339 self.assertTrue(t.item() == float('inf')) 2340 2341 # Tests non-positive rate fails 2342 with self.assertRaises(RuntimeError): 2343 torch.empty((1,), device=device, dtype=dtype).cauchy_(0.0, 0.0) 2344 2345 @skipIfMps 2346 @skipIfNoSciPy 2347 @skipRocmIfTorchInductor 2348 @dtypes(*all_types_and(torch.half, torch.bfloat16)) 2349 def test_geometric_kstest(self, device, dtype): 2350 from scipy import stats 2351 size = 1000 2352 for p in [0.2, 0.5, 0.8]: 2353 t = torch.empty(size, dtype=dtype, device=device).geometric_(p=p) 2354 actual = np.histogram(t.cpu().to(torch.double), np.arange(1, 100))[0] 2355 expected = stats.geom(p).pmf(np.arange(1, 99)) * size 2356 res = stats.chisquare(actual, expected) 2357 self.assertEqual(res.pvalue, 1.0, atol=0.1, rtol=0) 2358 2359 # FIXME: find test suite for pdist and cdist 2360 def test_pairwise_distance_empty(self, device): 2361 shape = (2, 0) 2362 x = torch.randn(shape, device=device) 2363 y = torch.randn(shape, device=device) 2364 2365 self.assertEqual(torch.zeros(2, device=device), torch.pairwise_distance(x, y)) 2366 self.assertEqual(torch.zeros((2, 1), device=device), torch.pairwise_distance(x, y, keepdim=True)) 2367 2368 shape = (0, 2) 2369 x = torch.randn(shape, device=device) 2370 y = torch.randn(shape, device=device) 2371 self.assertEqual(torch.zeros(0, device=device), torch.pairwise_distance(x, y)) 2372 self.assertEqual(torch.zeros((0, 1), device=device), torch.pairwise_distance(x, y, keepdim=True)) 2373 2374 def test_pdist_empty(self, device): 2375 shape = (0, 2) 2376 x = torch.randn(shape, device=device) 2377 self.assertEqual(torch.empty(0, device=device), torch.pdist(x)) 2378 2379 shape = (1, 2) 2380 x = torch.randn(shape, device=device) 2381 self.assertEqual(torch.empty(0, device=device), torch.pdist(x)) 2382 2383 shape = (3, 0) 2384 x = torch.randn(shape, device=device) 2385 self.assertEqual(torch.zeros(3, device=device), torch.pdist(x)) 2386 2387 def test_cdist_empty(self, device): 2388 x = torch.randn((0, 5), device=device) 2389 y = torch.randn((4, 5), device=device) 2390 self.assertEqual(torch.empty(0, 4, device=device), torch.cdist(x, y)) 2391 2392 x = torch.randn((2, 5), device=device) 2393 y = torch.randn((0, 5), device=device) 2394 self.assertEqual(torch.empty(2, 0, device=device), torch.cdist(x, y)) 2395 2396 x = torch.randn((2, 0), device=device) 2397 y = torch.randn((3, 0), device=device) 2398 self.assertEqual(torch.zeros(2, 3, device=device), torch.cdist(x, y)) 2399 2400 x = torch.randn((2, 0), device=device) 2401 y = torch.randn((0, 0), device=device) 2402 self.assertEqual(torch.empty(2, 0, device=device), torch.cdist(x, y)) 2403 2404 def _brute_cdist(self, x, y, p=2): 2405 r1 = x.shape[-2] 2406 r2 = y.shape[-2] 2407 if r1 == 0 or r2 == 0: 2408 return torch.empty(r1, r2, device=x.device) 2409 return torch.norm(x[..., None, :] - y[..., None, :, :], p=p, dim=-1) 2410 2411 @skipIfMps 2412 def test_cdist_norm(self, device): 2413 for r1 in [3, 4, 5, 6]: 2414 for m in [2, 3, 4, 10]: 2415 for r2 in [4, 6, 7, 8]: 2416 for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]: 2417 x = torch.randn(r1, m, device=device) 2418 y = torch.randn(r2, m, device=device) 2419 if p == 2: 2420 for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: 2421 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2422 expected = self._brute_cdist(x, y, p=2) 2423 self.assertEqual(expected, actual, rtol=0, atol=0.02) 2424 else: 2425 actual = torch.cdist(x, y, p=p) 2426 expected = self._brute_cdist(x, y, p=p) 2427 self.assertEqual(expected, actual) 2428 2429 @skipIfMps 2430 def test_cdist_norm_batch(self, device): 2431 for r1 in [3, 4, 5, 6]: 2432 for m in [2, 3, 4, 10]: 2433 for r2 in [4, 6, 7, 8]: 2434 for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]: 2435 x = torch.randn(2, 3, 6, r1, m, device=device) 2436 y = torch.randn(2, 3, 6, r2, m, device=device) 2437 if p == 2: 2438 for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: 2439 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2440 expected = self._brute_cdist(x, y, p=2) 2441 self.assertEqual(expected, actual, rtol=0, atol=0.02) 2442 else: 2443 actual = torch.cdist(x, y, p=p) 2444 expected = self._brute_cdist(x, y, p=p) 2445 self.assertEqual(expected, actual) 2446 2447 @onlyCUDA 2448 def test_cdist_cuda_backward(self, device): 2449 for l1 in [1, 511, 513]: 2450 for l2 in [1, 511, 513]: 2451 for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]: 2452 x1 = torch.randn(4, l1, 32, device=device, requires_grad=True) 2453 x2 = x1.clone().detach_().requires_grad_() 2454 y1 = torch.randn(4, l2, 32, device=device, requires_grad=True) 2455 y2 = y1.clone().detach_().requires_grad_() 2456 if p == 2: 2457 for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: 2458 z1 = torch.cdist(x1, y1, p=2, compute_mode=cm).mean() 2459 z2 = self._brute_cdist(x2, y2, p=2).mean() 2460 z1.backward() 2461 z2.backward() 2462 self.assertEqual(x1.grad, x2.grad, rtol=0, atol=0.001) 2463 self.assertEqual(y1.grad, y2.grad, rtol=0, atol=0.001) 2464 else: 2465 z1 = torch.cdist(x1, y1, p=p).mean() 2466 z2 = self._brute_cdist(x2, y2, p=p).mean() 2467 self.assertEqual(x1.grad, x2.grad, rtol=0, atol=0.001) 2468 self.assertEqual(y1.grad, y2.grad, rtol=0, atol=0.001) 2469 2470 @tf32_on_and_off(0.005) 2471 @bf32_on_and_off(0.005) 2472 def test_cdist_large(self, device): 2473 for cm in ['use_mm_for_euclid_dist_if_necessary', 'use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: 2474 x = torch.randn(1000, 10, device=device) 2475 y = torch.randn(1000, 10, device=device) 2476 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2477 expected = self._brute_cdist(x, y, p=2) 2478 self.assertEqual(expected, actual) 2479 2480 @slowTest 2481 @tf32_on_and_off(0.01) 2482 @bf32_on_and_off(0.01) 2483 def test_cdist_large_batch(self, device): 2484 for cm in ['use_mm_for_euclid_dist_if_necessary', 'use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: 2485 x = torch.randn(4, 3, 1000, 10, device=device) 2486 y = torch.randn(4, 3, 1000, 10, device=device) 2487 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2488 expected = self._brute_cdist(x, y, p=2) 2489 self.assertEqual(expected, actual) 2490 2491 @tf32_on_and_off(0.005) 2492 @bf32_on_and_off(0.005) 2493 def test_cdist_non_contiguous(self, device): 2494 for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: 2495 x = torch.randn(5, 7, device=device).mT 2496 y = torch.randn(5, 3, device=device).mT 2497 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2498 expected = self._brute_cdist(x, y, p=2) 2499 self.assertFalse(x.is_contiguous()) 2500 self.assertFalse(y.is_contiguous()) 2501 self.assertEqual(expected, actual) 2502 2503 x = torch.randn(7, 5, device=device) 2504 y = torch.randn(5, 3, device=device).t() 2505 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2506 expected = self._brute_cdist(x, y, p=2) 2507 self.assertTrue(x.is_contiguous()) 2508 self.assertFalse(y.is_contiguous()) 2509 self.assertEqual(expected, actual) 2510 2511 x = torch.randn(5, 7, device=device).t() 2512 y = torch.randn(3, 5, device=device) 2513 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2514 expected = self._brute_cdist(x, y, p=2) 2515 self.assertFalse(x.is_contiguous()) 2516 self.assertTrue(y.is_contiguous()) 2517 self.assertEqual(expected, actual) 2518 2519 @tf32_on_and_off(0.005) 2520 @bf32_on_and_off(0.005) 2521 def test_cdist_non_contiguous_batch(self, device): 2522 for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: 2523 x = torch.randn(4, 3, 2, 5, 7, device=device).mT 2524 y = torch.randn(4, 3, 2, 5, 3, device=device).mT 2525 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2526 expected = self._brute_cdist(x, y, p=2) 2527 self.assertFalse(x.is_contiguous()) 2528 self.assertFalse(y.is_contiguous()) 2529 self.assertEqual(expected, actual) 2530 2531 x = torch.randn(7, 2, 7, 5, device=device) 2532 y = torch.randn(7, 2, 5, 3, device=device).mT 2533 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2534 expected = self._brute_cdist(x, y, p=2) 2535 self.assertTrue(x.is_contiguous()) 2536 self.assertFalse(y.is_contiguous()) 2537 self.assertEqual(expected, actual) 2538 2539 x = torch.randn(4, 5, 7, device=device).mT 2540 y = torch.randn(4, 3, 5, device=device) 2541 actual = torch.cdist(x, y, p=2, compute_mode=cm) 2542 expected = self._brute_cdist(x, y, p=2) 2543 self.assertFalse(x.is_contiguous()) 2544 self.assertTrue(y.is_contiguous()) 2545 self.assertEqual(expected, actual) 2546 2547 # Maybe merge into OpInfo? 2548 def test_cdist_euclidean_large(self, device): 2549 def _test_euclidean_large_cdist(sizex, sizey=None): 2550 if sizey is None: 2551 sizey = sizex 2552 x = torch.randn(sizex, device=device, dtype=torch.float) 2553 y = torch.randn(sizey, device=device, dtype=torch.float) 2554 eps = 1e-6 2555 # to avoid extremum 2556 x = x - (((x - y) < eps).float() * 2 * eps) 2557 x.requires_grad = True 2558 y.requires_grad = True 2559 dist = torch.cdist(x, y, p=2) 2560 # Do a backward pass to check that it is valid for large 2561 # matrices 2562 loss = dist.sum() 2563 loss.backward() 2564 2565 _test_euclidean_large_cdist((2000, 5)) 2566 2567 # Ensure that cdist backward with p<1 does not produce NaNs 2568 @skipIfMps 2569 def test_cdist_grad_p_lt_1_no_nan(self, device): 2570 for p in [0.99, 0.7, 0.5, 0.1, 0.01]: 2571 x = torch.randn(1, 2, device=device) 2572 y = x.clone().detach() + torch.tensor([[1., 0.]], device=device) 2573 x.requires_grad = True 2574 y.requires_grad = True 2575 result = torch.cdist(x, y, p=p) 2576 result.backward(torch.ones_like(result)) 2577 self.assertFalse(torch.isnan(x.grad).any()) 2578 self.assertFalse(torch.isnan(y.grad).any()) 2579 2580 def test_cdist_same_inputs(self, device): 2581 # Test to detect issues in cdist gradient calculation 2582 # When the distances are 0 2583 sizex = (1, 27, 32) 2584 for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]: 2585 x = torch.randn(sizex, device=device, dtype=torch.float) 2586 dist_grad = torch.randn((1, 27, 27), device=device, dtype=torch.float) 2587 y = x.clone() 2588 eps = 1e-6 2589 x.requires_grad = True 2590 d = torch.cdist(x, y) 2591 d.backward(dist_grad) 2592 # Check that the backward passs does not contain invalid 2593 # values such as nan or inf 2594 assert torch.isfinite(x.grad).all() 2595 2596 @skipIfMps 2597 def test_cumsum(self, device): 2598 x = torch.rand(100, 100, device=device) 2599 res1 = torch.cumsum(x, 1) 2600 res2 = torch.tensor([]).to(device) 2601 torch.cumsum(x, 1, out=res2) 2602 self.assertEqual(res1, res2) 2603 x.cumsum_(1) 2604 self.assertEqual(res1, x) 2605 2606 a = torch.tensor([[True, False, True], 2607 [False, False, False], 2608 [True, True, True]], device=device) 2609 b = a.byte() 2610 aRes = torch.cumsum(a, 0) 2611 bRes = torch.cumsum(b, 0) 2612 self.assertEqual(aRes, bRes) 2613 self.assertEqual(aRes, torch.tensor([[1, 0, 1], 2614 [1, 0, 1], 2615 [2, 1, 2]])) 2616 2617 aRes = torch.cumsum(a, 1) 2618 bRes = torch.cumsum(b, 1) 2619 self.assertEqual(aRes, bRes) 2620 self.assertEqual(aRes, torch.tensor([[1, 1, 2], 2621 [0, 0, 0], 2622 [1, 2, 3]])) 2623 2624 # Check that cummulative sum over a zero length dimension doesn't crash on backprop. 2625 # Also check that cumsum over other dimensions in a tensor with a zero-length 2626 # dimensiuon also works 2627 # Also include a basic suite of similar tests for other bases cases. 2628 shapes = [[2, 0], [2, 1, 4], [0, 2, 3], [1], [5]] 2629 for shape in shapes: 2630 for dim in range(len(shape)): 2631 raw_tensor = torch.zeros(*shape, requires_grad=True) 2632 integrated = raw_tensor.cumsum(dim=dim) 2633 # Check that backward does not crash 2634 integrated.sum().backward() 2635 # Check that output maintained correct shape 2636 self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) 2637 2638 # Check a scalar example 2639 raw_tensor = torch.tensor(3., requires_grad=True) 2640 integrated = raw_tensor.cumsum(dim=-1) 2641 self.assertEqual(raw_tensor, integrated) 2642 # Check that backward does not crash 2643 integrated.sum().backward() 2644 # Check that output maintained correct shape 2645 self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) 2646 2647 @skipIfMps 2648 def test_cumprod(self, device): 2649 x = torch.rand(100, 100, device=device) 2650 res1 = torch.cumprod(x, 1) 2651 res2 = torch.tensor([]).to(device) 2652 if not TEST_WITH_TORCHINDUCTOR: 2653 torch.cumprod(x, 1, out=res2) 2654 self.assertEqual(res1, res2) 2655 x.cumprod_(1) 2656 self.assertEqual(res1, x) 2657 2658 a = torch.tensor([[True, False, True], 2659 [False, False, False], 2660 [True, True, True]], dtype=torch.bool, device=device) 2661 b = a.byte() 2662 aRes = torch.cumprod(a, 0) 2663 bRes = torch.cumprod(b, 0) 2664 self.assertEqual(aRes, bRes) 2665 self.assertEqual(aRes, torch.tensor([[1, 0, 1], 2666 [0, 0, 0], 2667 [0, 0, 0]])) 2668 2669 aRes = torch.cumprod(a, 1) 2670 bRes = torch.cumprod(b, 1) 2671 self.assertEqual(aRes, bRes) 2672 self.assertEqual(aRes, torch.tensor([[1, 0, 0], 2673 [0, 0, 0], 2674 [1, 1, 1]])) 2675 2676 # Check that cummulative prod over a zero length dimension doesn't crash on backprop. 2677 # Also check that cumprod over other dimensions in a tensor with a zero-length 2678 # dimensiuon also works 2679 # Also include a basic suite of similar tests for other bases cases. 2680 shapes = [[2, 0], [2, 1, 4], [0, 2, 3], [1], [5]] 2681 for shape in shapes: 2682 for dim in range(len(shape)): 2683 raw_tensor = torch.zeros(*shape, requires_grad=True) 2684 integrated = raw_tensor.cumprod(dim=dim) 2685 # Check that backward does not crash 2686 integrated.sum().backward() 2687 # Check that output maintained correct shape 2688 self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) 2689 2690 # Check a scalar example 2691 raw_tensor = torch.tensor(3., requires_grad=True) 2692 integrated = raw_tensor.cumprod(dim=-1) 2693 self.assertEqual(raw_tensor, integrated) 2694 # Check that backward does not crash 2695 integrated.sum().backward() 2696 # Check that output maintained correct shape 2697 self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) 2698 2699 @skipIfMps 2700 def test_cummax_cummin(self, device): 2701 def test_ops(op, string_of_function_name, expected_output1, expected_output2): 2702 x = torch.rand(100, 100, device=device) 2703 out1 = op(x, 1) 2704 res2 = torch.empty(0, device=device) 2705 indices2 = torch.empty(0, dtype=torch.int64, device=device) 2706 op(x, 1, out=(res2, indices2)) 2707 self.assertEqual(out1[0], res2) 2708 self.assertEqual(out1[1], indices2) 2709 2710 a = torch.tensor([[True, False, True], 2711 [False, False, False], 2712 [True, True, True]], dtype=torch.bool, device=device) 2713 b = a.byte() 2714 aRes = op(a, 0) 2715 bRes = op(b, 0) 2716 self.assertEqual(aRes[0], bRes[0].bool()) 2717 self.assertEqual(aRes[0], expected_output1.bool()) 2718 2719 # test inf and nan input 2720 x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1]) 2721 xRes = op(x, 0)[0] 2722 self.assertEqual(xRes, expected_output2) 2723 2724 # op shouldn't support values, indices with a dtype, device type or layout 2725 # different from that of input tensor 2726 t = torch.randn(10) 2727 values = torch.empty(0, dtype=torch.int16) 2728 indices = torch.empty(0, dtype=torch.int64) 2729 with self.assertRaisesRegex( 2730 RuntimeError, 2731 'expected scalar_type Float but found Short'): 2732 op(t, 0, out=(values, indices)) 2733 2734 # Check that op over a zero length dimension doesn't crash on backprop. 2735 # Also check that op over other dimensions in a tensor with a zero-length 2736 # dimension also works 2737 # Also include a basic suite of similar tests for other bases cases. 2738 shapes = [[2, 0], [2, 1, 4], [0, 2, 3], [1], [5]] 2739 for shape in shapes: 2740 for dim in range(len(shape)): 2741 raw_tensor = torch.zeros(*shape, requires_grad=True) 2742 integrated = getattr(raw_tensor, string_of_function_name)(dim=dim) 2743 # Check that backward does not crash 2744 integrated[0].sum().backward() 2745 # Check that output maintained correct shape 2746 self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) 2747 2748 # Check a scalar example 2749 raw_tensor = torch.tensor(3., requires_grad=True) 2750 integrated = getattr(raw_tensor, string_of_function_name)(dim=-1) 2751 # Check that backward does not crash 2752 integrated[0].sum().backward() 2753 # Check that output maintained correct shape 2754 self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) 2755 2756 expected_out = torch.tensor([4, inf, inf, inf, inf, nan, nan]) 2757 test_ops(torch.cummax, "cummax", torch.tensor([[1, 0, 1], 2758 [1, 0, 1], 2759 [1, 1, 1]]), expected_out) 2760 2761 expected_out = torch.tensor([4, 4, 1.5, -inf, -inf, nan, nan]) 2762 test_ops(torch.cummin, "cummin", torch.tensor([[1, 0, 1], 2763 [0, 0, 0], 2764 [0, 0, 0]]), expected_out) 2765 2766 @skipIfMps 2767 def test_logcumsumexp(self, device): 2768 def logcumsumexp(a, axis): 2769 return torch.cumsum(a.exp(), axis=axis).log_() 2770 2771 axis = -1 2772 a = torch.randn(100, 100, device=device) 2773 2774 actual = a.logcumsumexp(axis) 2775 expected = logcumsumexp(a, axis) 2776 self.assertEqual(a.dtype, actual.dtype) 2777 self.assertEqual(expected.shape, actual.shape) 2778 self.assertEqual(expected, actual) 2779 2780 # check -inf and nan handling 2781 x = torch.tensor([-float('inf'), -float('inf'), 1.0, 1.0, float('inf'), 2782 float('inf'), float('nan'), 1.0, 1.0], device=device) 2783 x2d = x.unsqueeze(0).expand(2, -1) 2784 2785 for inp in (x, x2d): 2786 actual = inp.logcumsumexp(axis) 2787 expected = logcumsumexp(inp, axis) 2788 self.assertEqual(expected, actual) 2789 2790 # Check that out is actually inplace 2791 b = torch.randn(5, 2, device=device) 2792 inplace_out = torch.zeros(5, 2, device=device) 2793 2794 expected = logcumsumexp(b, axis) 2795 torch.logcumsumexp(b, axis=axis, out=inplace_out) 2796 2797 self.assertEqual(inplace_out, expected) 2798 2799 # Check input and inplace_output type mismatch 2800 b = torch.randn(5, 2, device=device, dtype=torch.float64) 2801 inplace_out = torch.zeros(5, 2, device=device, dtype=torch.float32) 2802 with self.assertRaisesRegex( 2803 RuntimeError, 2804 'expected scalar_type Double but found Float'): 2805 torch.logcumsumexp(b, axis, out=inplace_out) 2806 2807 def _test_diff_numpy(self, t, dims=None): 2808 # Helper for test_diff to compare with NumPy reference implementation 2809 def to_np(t): 2810 if t.dtype == torch.bfloat16: 2811 return t.to(dtype=torch.float, device="cpu").numpy() 2812 else: 2813 return t.cpu().numpy() 2814 2815 for dim in dims if dims else range(t.dim()): 2816 prepend = t.narrow(dim, 0, 1) 2817 append = t.narrow(dim, 0, 1) 2818 np_t = to_np(t) 2819 2820 # test when no prepend and append 2821 for n in range(t.size(dim)): 2822 actual = torch.diff(t, dim=dim, n=n) 2823 expected = torch.from_numpy(np.diff(np_t, axis=dim, n=n)) 2824 self.assertEqual(actual, expected.to(t.dtype)) 2825 2826 # test when prepend and append's size along dim is 1 2827 for n in range(1, t.size(dim) + 4): 2828 actual = torch.diff(t, dim=dim, n=n, prepend=prepend, append=append) 2829 expected = torch.from_numpy(np.diff(np_t, axis=dim, n=n, prepend=to_np(prepend), append=to_np(append))) 2830 self.assertEqual(actual, expected.to(t.dtype)) 2831 2832 # test when prepend and append's size along dim != 1 2833 for n in range(1, t.size(dim) * 3): 2834 actual = torch.diff(t, dim=dim, n=n, prepend=t, append=t) 2835 expected = torch.from_numpy(np.diff(np_t, axis=dim, n=n, prepend=np_t, append=np_t)) 2836 self.assertEqual(actual, expected.to(t.dtype)) 2837 2838 # All tensors appear contiguous on XLA 2839 @onlyNativeDeviceTypes 2840 @dtypes(*all_types_and_complex_and(torch.half, torch.bool)) 2841 def test_diff_noncontig(self, device, dtype): 2842 shapes = ( 2843 (1,), 2844 (1, 5), 2845 (3, 5), 2846 (1, 5, 1), 2847 (2, 3, 5)) 2848 2849 for shape in shapes: 2850 contig = make_tensor(shape, dtype=dtype, device=device, low=-9, high=9) 2851 2852 non_contig = torch.empty(shape + (2, 2), device=device, dtype=dtype)[..., 0] 2853 non_contig = non_contig.select(-1, -1) 2854 non_contig.copy_(contig) 2855 self.assertTrue(not non_contig.is_contiguous() or shape == (1,)) 2856 2857 self._test_diff_numpy(non_contig) 2858 2859 # RngNormal not implemented for type f16 for XLA 2860 @dtypes(*all_types_and_complex_and(torch.bool)) 2861 @dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool)) 2862 @dtypesIfCUDA(*all_types_and_complex_and(torch.half, torch.bool)) 2863 def test_diff(self, device, dtype): 2864 shapes = ( 2865 (1,), 2866 (1, 5), 2867 (3, 5), 2868 (1, 5, 1), 2869 (2, 3, 5)) 2870 2871 for shape in shapes: 2872 contig = make_tensor(shape, dtype=dtype, device=device, low=-9, high=9) 2873 self._test_diff_numpy(contig) 2874 2875 t = torch.ones(2, 3) 2876 2877 with self.assertRaisesRegex( 2878 RuntimeError, 'diff expects prepend or append to be the same dimension as input'): 2879 invalid_prepend = torch.tensor([1, 2, 3], device=device, dtype=dtype) 2880 t.diff(dim=0, prepend=invalid_prepend) 2881 2882 with self.assertRaisesRegex( 2883 RuntimeError, 'diff expects the shape of tensor to prepend or append to match that of input'): 2884 invalid_prepend = torch.tensor([[0, 1]], device=device, dtype=dtype) 2885 t.diff(dim=0, prepend=invalid_prepend) 2886 2887 with self.assertRaisesRegex( 2888 RuntimeError, 'diff expects input to be at least one-dimensional'): 2889 scalar = torch.tensor(2, device=device, dtype=dtype) 2890 torch.diff(scalar) 2891 2892 # if the given input arg is not a list, it returns a list of single element: [arg] 2893 def _wrap_to_list(self, input_array): 2894 return input_array if isinstance(input_array, list) else [input_array] 2895 2896 # To ensure inf, -inf, and nan values do not cause divergence between Numpy and PyTorch. 2897 # There are two types of possible divergence: 2898 # 1. When we compute a,b both real numbers and has very small absolute values (i.e. very near to 0.0) 2899 # then, result of a/b be inf, -inf and nan, and this cause divergence. 2900 # 2. When we are dividing complex numbers by zero. For example, when a = torch.tensor(3+5j) we have 2901 # a/0 to be equal to nan + nan*j in PyTorch and inf + inf*j in Numpy. 2902 def _inf_nan_preprocess(self, actual, expected): 2903 for i in range(len(expected)): 2904 expected[i] = np.nan_to_num(expected[i], nan=nan, posinf=nan, neginf=nan) 2905 # nan_to_num is not defined for complex tensors in PyTorch. 2906 if actual[i].dtype == torch.complex64 : 2907 actual[i].real = torch.nan_to_num(actual[i].real, nan=nan, posinf=nan, neginf=nan) 2908 actual[i].imag = torch.nan_to_num(actual[i].imag, nan=nan, posinf=nan, neginf=nan) 2909 else: 2910 actual[i] = torch.nan_to_num(actual[i], nan=nan, posinf=nan, neginf=nan) 2911 2912 return actual, expected 2913 2914 @onlyNativeDeviceTypes 2915 @dtypes(torch.long, torch.float32, torch.complex64) 2916 def test_gradient_all(self, device, dtype): 2917 def create_scalar(shape): 2918 return make_tensor((1,), device='cpu', dtype=dtype, low=1.).item() 2919 2920 def create_list(shape): 2921 return make_tensor((len(shape),), device='cpu', dtype=dtype, low=1.).tolist() 2922 2923 def create_coordinate_tensors(shape): 2924 tensor_list = [] 2925 for i in range(len(shape)): 2926 tensor_list.append(make_tensor((shape[i],), device=device, dtype=dtype)) 2927 return tensor_list 2928 2929 def filter_shape(shape, dim): 2930 filtered_shape = [] 2931 for i in range(len(dim)): 2932 filtered_shape.append(shape[dim[i]]) 2933 return filtered_shape 2934 2935 # shape, dims format 2936 test_cases = ( 2937 ((5,), (0,)), 2938 ((4, 4), (0, 1)), 2939 ((3, 3, 3), (-1, 0)), 2940 ((4, 4, 4), (2,)), 2941 ((4, 4, 4), (0, 1)), 2942 ((4, 4, 4, 3), (0, 2, 3)), 2943 ((4, 5, 3, 4, 3), (1, 2)), 2944 ((4, 3, 6, 5, 3), (2, 4)), 2945 ((4, 3, 3, 5, 3), (0, 1, 2, 3, 4)), 2946 ((1, 3, 3), (1, 2)), 2947 ((1, 5), (1,)), 2948 ) 2949 2950 for case, contig, edge_order, space_fn in product(test_cases, [True, False], [1, 2], 2951 (create_scalar, create_list, create_coordinate_tensors)): 2952 shape, dims = case 2953 # filter shape by dims before passing filtered shape to create_* functions 2954 filtered_shape = filter_shape(shape, dims) 2955 2956 spacing = space_fn(filtered_shape) 2957 t = make_tensor(shape, device=device, dtype=dtype, noncontiguous=not contig) 2958 t_np = t.cpu().numpy() 2959 2960 actual = torch.gradient(t, spacing=spacing, dim=dims, edge_order=edge_order) 2961 if space_fn == create_coordinate_tensors and spacing[0].device != 'cpu': 2962 spacing = [space.cpu().detach().numpy() for space in spacing] 2963 expected = np.gradient(t_np, *self._wrap_to_list(spacing), axis=dims, edge_order=edge_order) 2964 actual, expected = self._inf_nan_preprocess(list(actual), self._wrap_to_list(expected)) 2965 self.assertEqual(actual, expected, equal_nan=True, atol=1e-4, rtol=0, exact_dtype=False) 2966 2967 @onlyNativeDeviceTypes 2968 @slowTestIf(TEST_WITH_TORCHINDUCTOR) 2969 @dtypes(torch.long, torch.float32, torch.complex64) 2970 def test_gradient_extreme_cases(self, device, dtype): 2971 # Test behaviour for inf and nan values 2972 actual = torch.gradient(torch.tensor([2, -2, inf, inf, -inf, -inf, inf, 3, -inf, 2, nan, nan, 3, inf, nan])) 2973 expected = np.gradient(np.array([2, -2, inf, inf, -inf, -inf, inf, 3, -inf, 2, nan, nan, 3, inf, nan])) 2974 self.assertEqual(actual, self._wrap_to_list(expected), exact_dtype=False) 2975 2976 # Test behaviour in very big tensors 2977 large_size = 100000 2978 t = make_tensor((large_size,), dtype=dtype, device=device) 2979 t_np = t.cpu().numpy() 2980 coordinates_np = np.random.randn(large_size) 2981 coordinates = [torch.tensor(coordinates_np, device=device)] 2982 actual = torch.gradient(t, spacing=coordinates, dim=0, edge_order=1) 2983 expected = [np.gradient(t_np, coordinates_np, axis=0, edge_order=1)] 2984 self.assertEqual(actual, expected, exact_dtype=False) 2985 2986 actual = torch.gradient(t, spacing=coordinates, dim=0, edge_order=2) 2987 expected = [np.gradient(t_np, coordinates_np, axis=0, edge_order=2)] 2988 self.assertEqual(actual, expected, exact_dtype=False) 2989 2990 @onlyNativeDeviceTypes 2991 def test_gradient_type_promotion(self, device): 2992 inputs = ( 2993 make_tensor((4, 4), device=device, dtype=torch.float32), 2994 make_tensor((4, 4), device=device, dtype=torch.complex64), 2995 make_tensor((4, 4), device=device, dtype=torch.int64), 2996 ) 2997 2998 spacing = ( 2999 make_tensor((1,), device='cpu', dtype=torch.float32).item(), 3000 make_tensor((1,), device='cpu', dtype=torch.int64).item(), 3001 make_tensor((1,), device='cpu', dtype=torch.complex64).item(), 3002 make_tensor((2,), device='cpu', dtype=torch.float32, low=0.1).tolist(), 3003 make_tensor((2,), device='cpu', dtype=torch.int64, low=1).tolist(), 3004 make_tensor((2,), device='cpu', dtype=torch.complex64).tolist(), 3005 [make_tensor((4,), device=device, dtype=torch.float32), 3006 make_tensor((4,), device=device, dtype=torch.float32)], 3007 [make_tensor((4,), device=device, dtype=torch.int64), 3008 make_tensor((4,), device=device, dtype=torch.int64)], 3009 [make_tensor((4,), device=device, dtype=torch.complex64), 3010 make_tensor((4,), device=device, dtype=torch.complex64)], 3011 ) 3012 3013 for input, spacing_or_coord, edge_order in product(inputs, spacing, [1, 2]): 3014 input_np = input.cpu().numpy() 3015 input_np = input.cpu().numpy() 3016 actual = torch.gradient(input, spacing=spacing_or_coord, dim=(0, 1), edge_order=edge_order) 3017 spacing_or_coord_wrapped = self._wrap_to_list(spacing_or_coord) 3018 spacing_or_coord_np = [] 3019 if torch.is_tensor(spacing_or_coord_wrapped[0]) and torch.device(spacing_or_coord_wrapped[0].device).type != 'cpu': 3020 for i in range(len(spacing_or_coord_wrapped)): 3021 spacing_or_coord_np.append(spacing_or_coord_wrapped[i].detach().clone().cpu().numpy()) 3022 else: 3023 spacing_or_coord_np = spacing_or_coord_wrapped 3024 expected = np.gradient(input_np, *spacing_or_coord_np, axis=(0, 1), edge_order=edge_order) 3025 if actual[0].dtype == torch.complex64 and input.dtype != torch.complex64: 3026 for i in range(len(actual)): 3027 self.assertEqual(actual[i].real, expected[i].real, exact_dtype=False) 3028 # Type promotion fails on Numpy when spacing is given as complex number and input is given as real. 3029 # Result is given just as real number and all the imaginary parts to be equal to zero. 3030 self.assertEqual(expected[i].imag, torch.zeros(actual[i].shape), exact_dtype=False) 3031 else: 3032 actual, expected = self._inf_nan_preprocess(list(actual), expected) 3033 self.assertEqual(actual, expected, equal_nan=True, exact_dtype=False) 3034 3035 @onlyNativeDeviceTypes 3036 @dtypes(torch.long, torch.float32, torch.complex64) 3037 def test_gradient_spacing_list_length_error(self, device, dtype): 3038 t = make_tensor((2, 2), device=device, dtype=dtype) 3039 3040 spacing = (make_tensor((2,), device=device, dtype=dtype),) 3041 with self.assertRaisesRegex(RuntimeError, r'expected spacing to be'): 3042 torch.gradient(t, spacing=spacing) 3043 3044 spacing = (make_tensor((2,), device=device, dtype=dtype),) * 2 3045 torch.gradient(t, spacing=spacing) 3046 3047 spacing = (make_tensor((2,), device=device, dtype=dtype),) * 3 3048 with self.assertRaisesRegex(RuntimeError, r'expected spacing to be'): 3049 torch.gradient(t, spacing=spacing) 3050 3051 spacing = (2,) 3052 with self.assertRaisesRegex(RuntimeError, r'expected spacing to be'): 3053 torch.gradient(t, spacing=spacing) 3054 3055 spacing = (2, 2) 3056 torch.gradient(t, spacing=spacing) 3057 3058 spacing = (2, 2, 2) 3059 with self.assertRaisesRegex(RuntimeError, r'expected spacing to be'): 3060 torch.gradient(t, spacing=spacing) 3061 3062 def _test_large_cum_fn_helper(self, x, fn): 3063 expected = fn(x.cpu().float()) 3064 actual = fn(x).cpu().float() 3065 # Avoid self.assertEqual to save memory. 3066 torch.testing.assert_close(expected, actual) 3067 3068 @unittest.skipIf(IS_FBCODE and IS_REMOTE_GPU, "sandcastle OOM with current tpx gpu/re configuration") 3069 @unittest.skipIf(IS_JETSON, "psutil issue for largeTensorTest. Too large for Jetson.") 3070 @onlyCUDA 3071 @dtypes(torch.half) # only small dtype not to get oom 3072 @largeTensorTest('25GB', device='cpu') 3073 @largeTensorTest('4GB', device='cuda') 3074 def test_large_cumsum(self, device, dtype): 3075 # initialization to avoid overflow and half caveats 3076 x = torch.empty(2**30 + 200, device=device, dtype=dtype) 3077 x[::3] = -3 3078 x[1::3] = 2 3079 x[2::3] = 1 3080 self._test_large_cum_fn_helper(x, lambda x: torch.cumsum(x, 0)) 3081 3082 @onlyCUDA 3083 @dtypes(torch.half) # only small dtype not to get oom 3084 @largeTensorTest('25GB', device='cpu') 3085 @largeTensorTest('4GB', device='cuda') 3086 @unittest.skipIf(IS_JETSON, "psutil issue for largeTensorTest. Too large for Jetson.") 3087 def test_large_cumprod(self, device, dtype): 3088 # initialization to avoid overflow and half caveats 3089 x = torch.empty(2**30 + 200, device=device, dtype=dtype) 3090 x[::3] = 8 3091 x[1::3] = .25 3092 x[2::3] = .5 3093 self._test_large_cum_fn_helper(x, lambda x: torch.cumprod(x, 0)) 3094 3095 @skipIfTorchDynamo("Torchdynamo fails with unknown reason") 3096 @skipIfMps 3097 def test_discontiguous_out_cumsum(self, device): 3098 x = torch.randn(4, 8, device=device) 3099 y = torch.empty(4, 16, device=device)[:, ::2] 3100 out = torch.cumsum(x, 0) 3101 torch.cumsum(x, 0, out=y) 3102 self.assertFalse(y.is_contiguous()) 3103 self.assertEqual(out, y, atol=0., rtol=0.) 3104 3105 def _test_cumminmax_helper(self, x, fn, expected_val, expected_ind): 3106 val, ind = fn(x, -1) 3107 self.assertEqual(val, expected_val, atol=0, rtol=0) 3108 self.assertEqual(ind, expected_ind, atol=0, rtol=0) 3109 out_val = torch.empty_like(val).t().contiguous().t() 3110 out_ind = torch.empty_like(ind).t().contiguous().t() 3111 fn(x, -1, out=(out_val, out_ind)) 3112 # TODO: Fix this. It reproduces with aot_eager too, and looks like a functionalization bug. 3113 # (the problematic case seems rare, as we're calling an out= op directly from user code, 3114 # where the passed-in out tensors are non-contiguous). 3115 if not TEST_WITH_TORCHINDUCTOR: 3116 self.assertFalse(out_val.is_contiguous()) 3117 self.assertFalse(out_ind.is_contiguous()) 3118 self.assertEqual(out_val, expected_val, atol=0, rtol=0) 3119 self.assertEqual(out_ind, expected_ind, atol=0, rtol=0) 3120 3121 @skipIfMps 3122 def test_cummax_discontiguous(self, device): 3123 x = torch.tensor([[0, 1, 2, 3, 2, 1], [4, 5, 6, 5, 6, 7]], device=device, dtype=torch.float).t().contiguous().t() 3124 expected_val = torch.tensor([[0, 1, 2, 3, 3, 3], [4, 5, 6, 6, 6, 7]], device=device, dtype=torch.float) 3125 expected_ind = torch.tensor([[0, 1, 2, 3, 3, 3], [0, 1, 2, 2, 4, 5]], device=device, dtype=torch.long) 3126 self._test_cumminmax_helper(x, torch.cummax, expected_val, expected_ind) 3127 3128 @skipIfMps 3129 def test_cummin_discontiguous(self, device): 3130 x = torch.tensor([[3, 2, 1, 0, 1, 2], [7, 6, 5, 4, 5, 2]], device=device, dtype=torch.float).t().contiguous().t() 3131 expected_val = torch.tensor([[3, 2, 1, 0, 0, 0], [7, 6, 5, 4, 4, 2]], device=device, dtype=torch.float) 3132 expected_ind = torch.tensor([[0, 1, 2, 3, 3, 3], [0, 1, 2, 3, 3, 5]], device=device, dtype=torch.long) 3133 self._test_cumminmax_helper(x, torch.cummin, expected_val, expected_ind) 3134 3135 def test_bool_tensor_value_change(self, device): 3136 x = torch.tensor([True, False], dtype=torch.bool, device=device) 3137 x[0] = False 3138 x[1] = True 3139 self.assertEqual(x, torch.tensor([False, True], dtype=torch.bool, device=device)) 3140 3141 # FIXME: move to shape ops test suite 3142 def test_unfold_all_devices_and_dtypes(self, device): 3143 for dt in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): 3144 3145 if dt == torch.bool: 3146 x = torch.empty((0, 1, 3, 0), dtype=dt, device=device) 3147 self.assertEqual((0, 1, 1, 0, 3), x.unfold(2, 3, 2).shape) 3148 else: 3149 x = torch.empty((0, 1, 3, 0), dtype=dt, device=device) 3150 self.assertEqual((0, 1, 1, 0, 3), x.unfold(2, 3, 2).shape) 3151 3152 # FIXME: move to shape ops test suite 3153 def test_unfold_scalars(self, device): 3154 x = torch.tensor(0.5, device=device) 3155 # unfold on a 0-dimensional tensor should always return a 1-d dimensional 3156 # tensor of shape [size] (i.e., the second parameter to unfold) 3157 3158 self.assertEqual(torch.empty(0, device=device), x.unfold(0, 0, 1)) 3159 self.assertEqual(torch.empty(0, device=device), x.unfold(0, 0, 2)) 3160 self.assertEqual(torch.tensor([0.5], device=device), x.unfold(0, 1, 1)) 3161 3162 # FIXME: move to data movement test suite 3163 def test_copy_all_dtypes_and_devices(self, device): 3164 from copy import copy 3165 for dt in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): 3166 x = torch.tensor([1, 2, 3, 4], dtype=dt, device=device) 3167 x_clone = x.clone() 3168 y = copy(x) 3169 y.fill_(1) 3170 # copy is a shallow copy, only copies the tensor view, 3171 # not the data 3172 self.assertEqual(x, y) 3173 3174 @onlyCPU 3175 def test_bfloat16_neg_abs(self, device): 3176 src = torch.randn(256) 3177 src[0] = torch.nan 3178 src[1] = -torch.nan 3179 src[2] = torch.inf 3180 src[3] = -torch.inf 3181 src_bf16 = src.bfloat16() 3182 self.assertEqual(src.neg().bfloat16(), src_bf16.neg()) 3183 self.assertEqual(src.abs().bfloat16(), src_bf16.abs()) 3184 3185 @onlyCPU 3186 @dtypes(torch.bfloat16, torch.half) 3187 def test_reduced_type_float_copy(self, device, dtype): 3188 for shape in [(20, 7), (249, 137), (1029, 917), (1, 7, 19, 17), (3, 77, 1091)]: 3189 input = torch.randn(shape, dtype=torch.float, device=device) 3190 out1 = input.to(dtype=dtype) 3191 self.assertEqual(input, out1, atol=None, rtol=None, exact_dtype=False) 3192 out2 = out1.to(torch.float) 3193 self.assertEqual(out2, out1, atol=0, rtol=0, exact_dtype=False) 3194 3195 input_s = input[..., ::2, :] 3196 out1 = input_s.to(dtype=dtype) 3197 self.assertEqual(input_s, out1, atol=None, rtol=None, exact_dtype=False) 3198 out2 = out1.to(torch.float) 3199 self.assertEqual(out2, out1, atol=0, rtol=0, exact_dtype=False) 3200 3201 # FIXME: move to data movement test suite 3202 @onlyNativeDeviceTypes 3203 def test_copy_math_view(self, device): 3204 for dst_dtype, src_dtype in [ 3205 (torch.float32, torch.float32), 3206 (torch.float64, torch.float32), 3207 (torch.int64, torch.int32), 3208 (torch.complex128, torch.complex64), 3209 ]: 3210 src = make_tensor((100,), dtype=src_dtype, device=device) 3211 dst = torch.empty(100, dtype=dst_dtype, device=device) 3212 3213 dst.copy_(src) 3214 self.assertEqual(dst, src, exact_dtype=False) 3215 3216 dst.copy_(src._neg_view()) 3217 self.assertEqual(dst, src.neg(), exact_dtype=False) 3218 3219 dst._neg_view().copy_(torch._neg_view(src)) 3220 self.assertEqual(dst, src, exact_dtype=False) 3221 3222 dst._neg_view().copy_(src) 3223 self.assertEqual(dst, src.neg(), exact_dtype=False) 3224 3225 # issue: https://github.com/pytorch/pytorch/issues/106051 3226 dst._neg_view().copy_(dst) 3227 self.assertEqual(dst, src, exact_dtype=False) 3228 3229 for dst_dtype, src_dtype in [ 3230 (torch.complex64, torch.complex64), 3231 (torch.complex128, torch.complex64), 3232 ]: 3233 src = make_tensor((100,), dtype=src_dtype, device=device) 3234 dst = torch.empty(100, dtype=dst_dtype, device=device) 3235 3236 dst.conj().copy_(src) 3237 self.assertEqual(dst, src.conj_physical(), exact_dtype=False) 3238 3239 dst.conj().copy_(src._neg_view()) 3240 self.assertEqual(dst, src.neg().conj_physical(), exact_dtype=False) 3241 3242 # FIXME: move to data movement test suite 3243 @onlyNativeDeviceTypes 3244 @dtypes(torch.int64, torch.float32, torch.complex64) 3245 def test_copy_transpose_math_view(self, device, dtype): 3246 src = make_tensor((100, 100), dtype=dtype, device=device).transpose(0, 1) 3247 dst = torch.empty((100, 100), dtype=dtype, device=device) 3248 3249 dst._neg_view().copy_(src) 3250 self.assertEqual(dst, -src) 3251 dst._neg_view().copy_(src._neg_view()) 3252 self.assertEqual(dst, src) 3253 dst.copy_(src._neg_view()) 3254 self.assertEqual(dst, -src) 3255 3256 if dtype.is_complex: 3257 dst.conj().copy_(src) 3258 self.assertEqual(dst, src.conj_physical()) 3259 dst.conj().copy_(src.conj()) 3260 self.assertEqual(dst, src) 3261 dst.copy_(src.conj()) 3262 self.assertEqual(dst, src.conj_physical()) 3263 3264 def test_clone_all_dtypes_and_devices(self, device): 3265 for dt in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): 3266 x = torch.tensor((1, 1), dtype=dt, device=device) 3267 y = x.clone() 3268 self.assertEqual(x, y) 3269 3270 def test_clone_zero_stride_dim(self, device): 3271 # stride zero, size 1 axis, not contiguous 3272 x = torch.randn(10) 3273 y = x.as_strided([2, 1, 5], [1, 0, 2]) 3274 self.assertEqual(y, y.clone()) 3275 3276 def test_clone_not_memory_dense(self): 3277 # github issue: https://github.com/pytorch/pytorch/issues/64176 3278 x = torch.randn(10, 8).t()[::2, ::2] 3279 y = x.clone() 3280 # should retain permutation after densification 3281 self.assertTrue(y.stride() == (1, 4)) 3282 3283 # FIXME: move to elementwise ternary test suite 3284 @dtypesIfCUDA(*set(get_all_math_dtypes('cuda'))) 3285 @dtypes(*set(get_all_math_dtypes('cpu'))) 3286 def test_addcmul(self, device, dtype): 3287 # Returns floating or integral scalar corresponding to dtype 3288 def _number(floating, integer, dtype): 3289 if dtype in [torch.half, torch.float, torch.double, torch.bfloat16]: 3290 return floating 3291 elif dtype in [torch.cfloat, torch.cdouble]: 3292 return floating * (1 + 1j) 3293 else: 3294 return integer 3295 3296 def rand_tensor(size, dtype, device): 3297 if dtype.is_floating_point or dtype.is_complex: 3298 return torch.rand(size=size, dtype=dtype, device=device) 3299 if dtype == torch.uint8: 3300 return torch.randint(1, 5, size=size, dtype=dtype, device=device) 3301 else: 3302 return torch.randint(-5, 5, size=size, dtype=dtype, device=device) 3303 3304 a = rand_tensor((2, 2), dtype=dtype, device=device) 3305 b = rand_tensor((2, 2), dtype=dtype, device=device) 3306 c = rand_tensor((2, 2), dtype=dtype, device=device) 3307 3308 alpha = _number(0.5, 3, dtype) 3309 3310 actual = torch.addcmul(a, b, c, value=alpha) 3311 expected = a + alpha * b * c 3312 3313 self.assertEqual(expected, actual) 3314 3315 with self.assertWarnsOnceRegex( 3316 UserWarning, "This overload of addcmul is deprecated"): 3317 self.assertEqual(actual, torch.addcmul(a, alpha, b, c)) 3318 3319 if self.device_type == 'cuda' and dtype == torch.half: 3320 a = torch.tensor([60000.0], device=device, dtype=dtype) 3321 b = torch.tensor([60000.0], device=device, dtype=dtype) 3322 c = torch.tensor([2.0], device=device, dtype=dtype) 3323 out = torch.addcmul(a, b, c, value=-1) 3324 self.assertTrue(not (out.isnan() or out.isinf())) 3325 3326 # FIXME: move to shape ops test suite 3327 def test_narrow_empty(self, device): 3328 x = torch.randn(2, 3, 4, device=device) 3329 for d in range(x.dim()): 3330 y = x.narrow(d, x.size(d), 0) 3331 sz = list(x.size()) 3332 sz[d] = 0 3333 self.assertEqual(sz, y.size()) 3334 3335 def test_narrow_copy_non_contiguous(self, device): 3336 # see https://github.com/pytorch/pytorch/issues/91690. 3337 inp = torch.randn(10, 2, device=device).movedim(-1, 0) 3338 expected = torch.narrow_copy(inp.contiguous(), 1, 0, 10) 3339 actual = torch.narrow_copy(inp, 1, 0, 10) 3340 self.assertEqual(expected, actual) 3341 3342 # FIXME: move to indexing test suite 3343 @parametrize("reduce", ['prod', 'amin', 'amax', 'mean']) 3344 @dtypes(*all_types_and(torch.half, torch.bfloat16)) 3345 def test_index_reduce(self, device, dtype, reduce): 3346 size = (3, 4, 5) 3347 index_dtypes = [torch.int, torch.long] 3348 include_selfs = [True, False] 3349 amin_init = float('inf') if dtype.is_floating_point else torch.iinfo(dtype).max 3350 amax_init = -float('inf') if dtype.is_floating_point else torch.iinfo(dtype).min 3351 reduction_init = {'prod': 1, 'mean': 0, 'amin': amin_init, 'amax': amax_init} 3352 3353 for dest_noncontig, src_noncontig, index_noncontig in product([True, False], repeat=3): 3354 for idx_dtype, include_self in product(index_dtypes, include_selfs): 3355 for dim in range(len(size)): 3356 num_src = np.random.randint(10) 3357 num_dest = size[dim] 3358 dest = make_tensor(size, device=device, dtype=dtype, noncontiguous=dest_noncontig) 3359 src_size = size[:dim] + (num_src,) + size[dim + 1:] 3360 src = make_tensor(src_size, device=device, dtype=dtype, noncontiguous=src_noncontig) 3361 idx = torch.testing.make_tensor( 3362 num_src, low=0, high=num_dest, dtype=idx_dtype, device=device, noncontiguous=index_noncontig 3363 ) 3364 expected = dest.clone() 3365 dest.index_reduce_(dim, idx, src, reduce, include_self=include_self) 3366 # fill rows in idx with reduction inits if include_self=False 3367 if (not include_self): 3368 expected.index_fill_(dim, idx.long(), reduction_init[reduce]) 3369 expected = expected.transpose(0, dim) 3370 src = src.transpose(0, dim) 3371 for i in range(num_src): 3372 if reduce == 'prod': 3373 expected[idx[i]] *= src[i] 3374 elif reduce == 'amin': 3375 torch.minimum(expected[idx[i]], src[i], out=expected[idx[i]]) 3376 elif reduce == 'amax': 3377 torch.maximum(expected[idx[i]], src[i], out=expected[idx[i]]) 3378 else: 3379 expected[idx[i]] += src[i] 3380 if reduce == 'mean': 3381 counts = torch.ones_like(expected) if include_self else torch.zeros_like(expected) 3382 counts.index_add_(0, idx, torch.ones_like(src)) 3383 counts.masked_fill_(counts == 0, 1) 3384 if (dtype.is_floating_point): 3385 expected.div_(counts) 3386 else: 3387 expected.div_(counts, rounding_mode="floor") 3388 expected = expected.transpose(0, dim) 3389 3390 self.assertEqual(dest, expected) 3391 3392 # FIXME: move to test indexing 3393 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3394 def test_index_copy(self, device, dtype): 3395 # We just test for num_copy <= num_dest, as otherwise there are repeated indices 3396 # and the behavior is undefined 3397 num_copy, num_dest = 3, 5 3398 3399 def make_arg(batch_sizes, n, dim, contig): 3400 size_arg = batch_sizes[:dim] + (n,) + batch_sizes[dim:] 3401 return make_tensor(size_arg, dtype=dtype, device=device, low=None, high=None, noncontiguous=not contig) 3402 3403 def ref_index_copy(tgt, dim, idx, src): 3404 for i in range(idx.size(0)): 3405 idx_dest = dim * (slice(None),) + (idx[i],) 3406 idx_src = dim * (slice(None),) + (i,) 3407 tgt[idx_dest] = src[idx_src] 3408 3409 # More thorough testing as in index_add 3410 for dest_contig, src_contig, index_contig in product([True, False], repeat=3): 3411 for other_sizes in ((), (4, 5)): 3412 for dim in range(len(other_sizes)): 3413 dest = make_arg(other_sizes, num_dest, dim, dest_contig) 3414 src = make_arg(other_sizes, num_copy, dim, src_contig) 3415 idx = torch.randperm(num_dest, dtype=torch.int64, device=device)[:num_copy] 3416 if not index_contig: 3417 idx = torch.repeat_interleave(idx, 2, dim=-1) 3418 idx = idx[..., ::2] 3419 dest2 = dest.clone() 3420 dest.index_copy_(dim, idx, src) 3421 ref_index_copy(dest2, dim, idx, src) 3422 self.assertEqual(dest, dest2) 3423 3424 # FIXME: move to test indexing 3425 # onlyNativeDeviceTypes due to an XLA error: 3426 # https://github.com/pytorch/pytorch/issues/53256 3427 @onlyNativeDeviceTypes 3428 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3429 def test_index_copy_scalars(self, device, dtype): 3430 # Create the 8 possible combinations of scalar sizes for target / index / source 3431 scalars = ((make_tensor(size_t, dtype=dtype, device=device, low=None, high=None), 3432 make_tensor(size_i, dtype=torch.int64, device=device, low=0, high=1), 3433 make_tensor(size_s, dtype=dtype, device=device, low=None, high=None)) 3434 for size_t, size_i, size_s in product([(), (1,)], repeat=3)) 3435 for target, idx, source in scalars: 3436 target.index_copy_(0, idx, source) 3437 self.assertEqual(target.item(), source.item()) 3438 3439 # FIXME: move to test indexing 3440 @onlyCPU 3441 def test_errors_index_copy(self, device): 3442 # We do not test the GPU as the CUDA_ASSERT would break the CUDA context 3443 idx_dim = 8 3444 tgt_dim = 5 3445 batch_dim = 3 3446 3447 # Too large of an index 3448 a = torch.randn(batch_dim, tgt_dim, device=device) 3449 idx = torch.full((idx_dim,), tgt_dim, device=device) 3450 c = torch.zeros(batch_dim, idx_dim, device=device) 3451 with self.assertRaises(IndexError): 3452 a.index_copy_(1, idx, c) 3453 3454 # Too small (negative indices) 3455 idx = torch.full((idx_dim,), -1, device=device) 3456 with self.assertRaises(IndexError): 3457 a.index_copy_(1, idx, c) 3458 3459 # Too small (very negative indices) - they should be unsupported even 3460 # when support for negative indices is implemented for index_copy_ 3461 idx = torch.full((idx_dim,), -tgt_dim - 1, device=device) 3462 with self.assertRaises(IndexError): 3463 a.index_copy_(1, idx, c) 3464 3465 def _prepare_data_for_index_copy_and_add_deterministic( 3466 self, dim: int, device: torch.device 3467 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 3468 assert (dim >= 0 and dim < 3) 3469 a = [5, 4, 3] 3470 a[dim] = 2000 3471 x = torch.zeros(a, device=device) 3472 b = a.copy() 3473 elems = a[dim] * 20 3474 b[dim] = elems 3475 src = torch.rand(b, device=device) 3476 index = torch.randint(a[dim], (elems,), device=device) 3477 return (x, index, src) 3478 3479 # FIXME: move to test indexing 3480 @onlyNativeDeviceTypes 3481 def test_index_copy_deterministic(self, device: torch.device) -> None: 3482 for dim in range(3): 3483 x, index, src = self._prepare_data_for_index_copy_and_add_deterministic(dim, device) 3484 with DeterministicGuard(True): 3485 y0 = torch.index_copy(x, dim, index, src) 3486 3487 x0 = x.clone().detach() 3488 index_list = index.tolist() 3489 for i in range(len(index_list)): 3490 if dim == 0: 3491 x0[index_list[i], :, :] = src[i, :, :] 3492 elif dim == 1: 3493 x0[:, index_list[i], :] = src[:, i, :] 3494 elif dim == 2: 3495 x0[:, :, index_list[i]] = src[:, :, i] 3496 3497 self.assertEqual(x0, y0, atol=0, rtol=0) 3498 3499 # FIXME: move to test indexing 3500 @onlyNativeDeviceTypes 3501 def test_index_add_deterministic(self, device: torch.device) -> None: 3502 for dim in range(3): 3503 x, index, src = self._prepare_data_for_index_copy_and_add_deterministic(dim, device) 3504 alpha = random.random() + 1 3505 # on CPU it should be deterministic regardless of the deterministic mode 3506 with DeterministicGuard(True): 3507 y0 = torch.index_add(x, dim, index, src, alpha=alpha) 3508 for _ in range(3): 3509 y = torch.index_add(x, dim, index, src, alpha=alpha) 3510 self.assertEqual(y, y0, atol=0, rtol=0) 3511 3512 with DeterministicGuard(False): 3513 for _ in range(3): 3514 y_nd = torch.index_add(x, dim, index, src, alpha=alpha) 3515 self.assertEqual(y_nd, y0, atol=1e-3, rtol=1e-5) 3516 3517 # FIXME: find a test suite for the put operator 3518 @onlyNativeDeviceTypes 3519 def test_index_put_non_accumulate_deterministic(self, device) -> None: 3520 with DeterministicGuard(True): 3521 for i in range(3): 3522 m = random.randint(10, 20) 3523 elems = random.randint(20000, 30000) 3524 values = torch.rand(elems, device=device) 3525 indices = torch.randint(m, (elems,), device=device) 3526 input = torch.rand(m, device=device) 3527 output = input.index_put((indices,), values, accumulate=False) 3528 3529 input_list = input.tolist() 3530 indices_list = indices.tolist() 3531 values_list = values.tolist() 3532 for i, v in zip(indices_list, values_list): 3533 input_list[i] = v 3534 3535 self.assertEqual(output, input_list) 3536 3537 # FIXME: move to test indexing 3538 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3539 @skipIfMps 3540 def test_index_fill(self, device, dtype): 3541 x = torch.tensor([[1, 2], [4, 5]], dtype=dtype, device=device) 3542 index = torch.tensor([0], device=device) 3543 x.index_fill_(1, index, 0) 3544 self.assertEqual(x, torch.tensor([[0, 2], [0, 5]], dtype=dtype, device=device)) 3545 if not x.is_complex() and not device == "meta": 3546 with self.assertRaisesRegex(RuntimeError, r"Scalar"): 3547 x.index_fill_(1, index, 1 + 1j) 3548 # Make sure that the result stays 0-dim while applied to 3549 # a 0-dim input 3550 x = torch.tensor(1, dtype=dtype, device=device) 3551 self.assertEqual(0, x.index_fill(0, index, -1).dim()) 3552 self.assertEqual(0, x.index_fill_(0, index, -1).dim()) 3553 3554 # FIXME: move to test indexing 3555 # The test fails for zero-dimensional tensors on XLA 3556 @onlyNativeDeviceTypes 3557 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3558 def test_index_select(self, device, dtype): 3559 num_src, num_out = 3, 5 3560 3561 def make_arg(batch_sizes, n, dim, contig): 3562 size_arg = batch_sizes[:dim] + (n,) + batch_sizes[dim:] 3563 return make_tensor(size_arg, dtype=dtype, device=device, low=None, high=None, noncontiguous=not contig) 3564 3565 def ref_index_select(src, dim, idx): 3566 # bfloat16 is just used on GPU, so it's not supported on numpy 3567 if dtype == torch.bfloat16: 3568 src = src.float() 3569 out = torch.from_numpy(np.take(src.cpu().numpy(), idx.cpu().numpy(), axis=dim)) 3570 if dtype == torch.bfloat16: 3571 out = out.to(device=device, dtype=dtype) 3572 return out 3573 3574 for src_contig, idx_contig in product([True, False], repeat=2): 3575 for other_sizes in ((), (4, 5)): 3576 for dim in range(len(other_sizes)): 3577 src = make_arg(other_sizes, num_src, dim, src_contig) 3578 idx = make_tensor( 3579 (num_out,), dtype=torch.int64, device=device, low=0, high=num_src, noncontiguous=not idx_contig 3580 ) 3581 out = torch.index_select(src, dim, idx) 3582 out2 = ref_index_select(src, dim, idx) 3583 self.assertEqual(out, out2) 3584 3585 for idx_type in (torch.int32, torch.int64): 3586 other_sizes = (3, 2) 3587 dim = 1 3588 src = make_arg(other_sizes, num_src, dim, True) 3589 idx = make_tensor((num_out,), dtype=idx_type, device=device, low=0, high=num_src, noncontiguous=False) 3590 out = torch.index_select(src, dim, idx) 3591 out2 = ref_index_select(src, dim, idx) 3592 self.assertEqual(out, out2) 3593 3594 # Create the 4 possible combinations of scalar sizes for index / source 3595 scalars = ((make_tensor(size_s, dtype=dtype, device=device), 3596 torch.zeros(size_i, dtype=torch.int64, device=device)) 3597 for size_s, size_i in product([(), (1,)], repeat=2)) 3598 for source, idx in scalars: 3599 out = source.index_select(0, idx) 3600 self.assertEqual(out.item(), source.item()) 3601 3602 # FIXME: find a test suite for the take operator 3603 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3604 def test_take(self, device, dtype): 3605 idx_size = (4,) 3606 3607 make_arg = partial(make_tensor, device=device, dtype=dtype) 3608 make_idx = partial(make_tensor, low=0, device=device, dtype=torch.int64) 3609 3610 def ref_take(src, idx): 3611 if dtype == torch.bfloat16: 3612 src = src.half() 3613 src = src.cpu().numpy() 3614 idx = idx.cpu().numpy() 3615 out = torch.from_numpy(np.take(src, idx)).to(device=device, dtype=dtype) 3616 return out 3617 3618 for src_contig, idx_contig, idx_reshape in product([True, False], repeat=3): 3619 for src_size in ((5,), (4, 5)): 3620 src = make_arg(src_size, noncontiguous=not src_contig) 3621 idx = make_idx(idx_size, high=src.numel(), noncontiguous=not idx_contig) 3622 if idx_reshape: 3623 idx = idx.reshape(2, 2) 3624 out = torch.take(src, idx) 3625 out2 = ref_take(src, idx) 3626 self.assertEqual(out, out2) 3627 3628 # Create the 4 possible combinations of scalar sizes for source / index 3629 for size_s, size_i in product([(), (1,)], repeat=2): 3630 source = make_arg(size_s) 3631 idx = make_idx(size_i, high=1) 3632 out = source.take(idx) 3633 self.assertEqual(out.item(), source.item()) 3634 3635 # FIXME: find a test suite for the put operator 3636 # The bool instance does not work on GPU. See 3637 # https://github.com/pytorch/pytorch/issues/54317 3638 @dtypes(*all_types_and_complex_and(torch.half, torch.bfloat16)) 3639 def test_put(self, device, dtype): 3640 src_size = (4,) 3641 3642 make_arg = partial(make_tensor, device=device, dtype=dtype) 3643 make_idx = partial(make_tensor, low=0, device=device, dtype=torch.int64) 3644 3645 def ref_put(dst, idx, src, accumulate): 3646 new_dst = dst.clone(memory_format=torch.contiguous_format).view(-1) 3647 new_idx = idx.contiguous().view(-1) 3648 new_src = src.contiguous().view(-1) 3649 method = new_dst.index_add_ if accumulate else new_dst.index_copy_ 3650 return method(0, new_idx, new_src).view_as(dst) 3651 3652 for dst_contig, src_contig, idx_contig, idx_reshape, accumulate in product([True, False], repeat=5): 3653 for dst_size in ((5,), (4, 5)): 3654 dst = make_arg(dst_size, noncontiguous=not dst_contig) 3655 src = make_arg(src_size, noncontiguous=not src_contig) 3656 3657 # If accumulate=True, `put_` should be deterministic regardless of the inputs on CPU 3658 # On CUDA it may not be, but the test has enough tolerance to account for this 3659 if accumulate: 3660 idx = make_idx(src_size, high=dst.numel()) 3661 else: 3662 idx = torch.randperm(dst.numel(), dtype=torch.int64, device=device)[:src_size[0]] 3663 if not idx_contig: 3664 idx = torch.repeat_interleave(idx, 2, dim=-1)[..., ::2] 3665 if idx_reshape: 3666 idx = idx.reshape(2, 2) 3667 out = torch.put(dst, idx, src, accumulate) 3668 # out-place 3669 reference = ref_put(dst, idx, src, accumulate) 3670 self.assertEqual(out, reference) 3671 3672 # in-place 3673 dst.put_(idx, src, accumulate) 3674 self.assertEqual(dst, reference) 3675 3676 3677 # Create the 8 possible combinations of scalar sizes for target / index / source 3678 scalars = ((make_arg(size_t), 3679 make_idx(size_i, high=1), 3680 make_arg(size_s)) 3681 for size_t, size_i, size_s in product([(), (1,)], repeat=3)) 3682 for (dest, idx, source), accumulate in product(scalars, [True, False]): 3683 dest_init = dest.clone() 3684 # out-place 3685 out = torch.put(dest, idx, source, accumulate=accumulate) 3686 # in-place 3687 dest1 = dest.clone() 3688 dest1.put_(idx, source, accumulate=accumulate) 3689 for d in [out, dest1]: 3690 if accumulate: 3691 self.assertEqual(d.item(), (dest_init + source).item()) 3692 else: 3693 self.assertEqual(d.item(), source.item()) 3694 3695 # Empty case 3696 dest = make_arg((3, 2)) 3697 reference = dest.clone() 3698 idx = make_idx((0,), high=1) 3699 source = make_arg((0,)) 3700 for accumulate in [True, False]: 3701 out = torch.put(dest, idx, source, accumulate=accumulate) 3702 self.assertEqual(out, reference) 3703 dest.put_(idx, source, accumulate=accumulate) 3704 self.assertEqual(dest, reference) 3705 3706 # FIXME: find a test suite for the put operator 3707 # The bool instance does not work on GPU. See 3708 # https://github.com/pytorch/pytorch/issues/54317 3709 @dtypes(*all_types_and_complex_and(torch.half, torch.bfloat16)) 3710 def test_put_accumulate(self, device, dtype): 3711 # Test for parallel adds with accumulate == True 3712 low_precision = dtype == torch.half or dtype == torch.bfloat16 3713 # Less numbers to avoid overflow with low_precision 3714 # Grainsize is 3000 for the for_loop to be parallized on CPU 3715 sizes = ((100,)) if low_precision else ((200,), (3002,)) 3716 # Bfloat16 has a particularly bad performance here 3717 # This operation is nondeterministic on GPU, so we are generous with the rtol 3718 rtol, atol = (1e-1, 1e-2) if low_precision else (1e-3, 1e-4) 3719 3720 make_arg = partial(make_tensor, low=-2, high=3, device=device, dtype=dtype) 3721 # Dump everything into the 0-th position 3722 make_idx = partial(torch.zeros, device=device, dtype=torch.int64) 3723 args = ((make_idx(size), make_arg(size)) for size in sizes) 3724 3725 for idx, source in args: 3726 orig = make_arg((1,)) 3727 out = orig.put(idx, source, accumulate=True) 3728 self.assertEqual(out, orig + source.sum(), rtol=rtol, atol=atol) 3729 3730 # FIXME: find a test suite for the take operator 3731 @skipIfMps 3732 def test_take_empty(self, device): 3733 for input_shape in [(0,), (0, 1, 2, 0), (1, 2, 3)]: 3734 for indices_shape in [(0,), (0, 1, 2, 0)]: 3735 input = torch.empty(input_shape, device=device) 3736 indices = torch.empty(indices_shape, dtype=torch.int64, device=device) 3737 self.assertEqual(indices, torch.take(input, indices), exact_dtype=False) 3738 3739 # FIXME: find a test suite for the put operator 3740 def test_put_empty(self, device): 3741 for dst_shape in [(0,), (0, 1, 2, 0), (1, 2, 3)]: 3742 for indices_shape in [(0,), (0, 1, 2, 0)]: 3743 for accumulate in [False, True]: 3744 dst = torch.randn(dst_shape, device=device) 3745 indices = torch.empty(indices_shape, dtype=torch.int64, device=device) 3746 src = torch.randn(indices_shape, device=device) 3747 self.assertEqual(dst, dst.put_(indices, src, accumulate=accumulate)) 3748 3749 # FIXME: port to test_scatter_gather_ops.py 3750 def scatter_allow_reduce(self, device, dtype, reduceop): 3751 device_type = torch.device(device).type 3752 return device_type != 'cuda' or (reduceop == 'multiply' and dtype.is_floating_point) 3753 3754 @dtypes(*floating_and_complex_types()) 3755 @dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3756 @dtypesIfCUDA(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3757 def test_scatter_reduce_operations_to_large_input(self, device, dtype): 3758 index = torch.tensor([[1], [2]], device=device, dtype=torch.long) 3759 test_data = [ 3760 (torch.zeros(4, 4, device=device, dtype=dtype), 3761 torch.ones(2, 2, device=device, dtype=dtype), 3762 torch.tensor([[0, 0, 0, 0], 3763 [1, 0, 0, 0], 3764 [1, 0, 0, 0], 3765 [0, 0, 0, 0]], 3766 device=device, dtype=dtype), "add"), 3767 (torch.tensor([2], device=device, dtype=dtype).repeat(4, 4), 3768 torch.tensor([6], device=device, dtype=dtype).repeat(2, 2), 3769 torch.tensor([[2, 2, 2, 2], 3770 [12, 2, 2, 2], 3771 [12, 2, 2, 2], 3772 [2, 2, 2, 2]], device=device, dtype=dtype), "multiply"), 3773 ] 3774 3775 for input, src, result, operation in test_data: 3776 if not self.scatter_allow_reduce(device, dtype, operation): 3777 continue 3778 input.scatter_(0, index, src, reduce=operation) 3779 self.assertEqual(input, result) 3780 3781 @dtypes(*floating_and_complex_types()) 3782 @dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3783 @dtypesIfCUDA(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3784 def test_scatter_reduce_scalar(self, device, dtype): 3785 index = torch.tensor([[1], [2]], device=device, dtype=torch.long) 3786 test_data = [ 3787 (torch.zeros(4, 4, device=device, dtype=dtype), 1, 3788 torch.tensor([[0, 0, 0, 0], 3789 [1, 0, 0, 0], 3790 [1, 0, 0, 0], 3791 [0, 0, 0, 0]], 3792 device=device, dtype=dtype), "add"), 3793 (torch.tensor([2], device=device, dtype=dtype).repeat(4, 4), 2, 3794 torch.tensor([[2, 2, 2, 2], 3795 [4, 2, 2, 2], 3796 [4, 2, 2, 2], 3797 [2, 2, 2, 2]], device=device, dtype=dtype), "multiply"), 3798 ] 3799 3800 for input, src, result, operation in test_data: 3801 if not self.scatter_allow_reduce(device, dtype, operation): 3802 continue 3803 input.scatter_(0, index, src, reduce=operation) 3804 self.assertEqual(input, result) 3805 3806 # FIXME: port to test_scatter_gather_ops.py 3807 # TODO: remove this after scatter_add_ is deprecated. 3808 def test_scatter_add_non_unique_index(self, device): 3809 height = 2 3810 width = 65536 3811 input = torch.ones(height, width, device=device) 3812 index = torch.zeros(height, width, dtype=torch.long, device=device) 3813 src = torch.ones(height, width, device=device) 3814 input.scatter_add_(0, index, src) 3815 3816 self.assertEqual(input, 3817 torch.tensor([[3], [1]], device=device, 3818 dtype=torch.float32).repeat(1, width)) 3819 3820 @dtypes(*floating_and_complex_types()) 3821 @dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3822 @dtypesIfCUDA(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3823 def test_scatter_reduce_non_unique_index(self, device, dtype): 3824 height = 2 3825 width = 2 3826 index = torch.zeros(height, width, dtype=torch.long, device=device) 3827 test_data = [ 3828 (torch.ones(height, width, device=device, dtype=dtype), 3829 torch.ones(height, width, device=device, dtype=dtype), 3830 torch.tensor([[3], [1]], device=device, dtype=dtype).repeat(1, width), "add"), 3831 (torch.tensor([2], device=device, dtype=dtype).repeat(height, width), 3832 torch.tensor([2], device=device, dtype=dtype).repeat(height, width), 3833 torch.tensor([[8], [2]], device=device, 3834 dtype=dtype).repeat(1, width), "multiply"), 3835 ] 3836 3837 for input, src, result, operation in test_data: 3838 if not self.scatter_allow_reduce(device, dtype, operation): 3839 continue 3840 input.scatter_(0, index, src, reduce=operation) 3841 self.assertEqual(input, result, msg=f"result: {result} input: {input} method: {str(operation)}") 3842 3843 @onlyCUDA 3844 @dtypes(*complex_types()) 3845 def test_scatter_reduce_multiply_unsupported_dtypes(self, device, dtype): 3846 height = 2 3847 width = 2 3848 index = torch.zeros(height, width, dtype=torch.long, device=device) 3849 input = torch.ones(height, width, device=device, dtype=dtype) 3850 src = torch.ones(height, width, device=device, dtype=dtype) 3851 with self.assertRaises(RuntimeError): 3852 input.scatter_(0, index, src, reduce="multiply") 3853 3854 # FIXME: port to test_scatter_gather_ops.py 3855 def test_scatter_to_large_input(self, device): 3856 input = torch.zeros(4, 4, device=device) 3857 src = torch.ones(2, 2, device=device) 3858 index = torch.tensor([[1], [2]], device=device, dtype=torch.long) 3859 input.scatter_(0, index, src) 3860 self.assertEqual(input, torch.tensor([[0, 0, 0, 0], 3861 [1, 0, 0, 0], 3862 [1, 0, 0, 0], 3863 [0, 0, 0, 0]], device=device, dtype=torch.float32)) 3864 3865 # FIXME: port to test_scatter_gather_ops.py 3866 def test_scatter_add_to_large_input(self, device): 3867 input = torch.zeros(4, 4, device=device) 3868 src = torch.ones(2, 2, device=device) 3869 index = torch.tensor([[1], [2]], device=device, dtype=torch.long) 3870 input.scatter_add_(0, index, src) 3871 self.assertEqual(input, torch.tensor([[0, 0, 0, 0], 3872 [1, 0, 0, 0], 3873 [1, 0, 0, 0], 3874 [0, 0, 0, 0]], device=device, dtype=torch.float32)) 3875 3876 # FIXME: port to test_scatter_gather_ops.py 3877 def test_scatter_bool(self, device): 3878 x = torch.tensor([[True, True, True], [True, True, True]], device=device) 3879 res = torch.zeros(3, 3, dtype=torch.bool, device=device) 3880 res = res.scatter_(0, torch.tensor([[0, 1, 2], [0, 1, 2]], device=device), x) 3881 self.assertEqual(res, torch.tensor([[True, False, False], 3882 [False, True, False], 3883 [False, False, True]], device=device)) 3884 3885 # FIXME: port to test_scatter_gather_ops.py 3886 def test_scatter_add_bool(self, device): 3887 x = torch.tensor([[True, True, True, True, True], [True, True, True, True, True]], device=device) 3888 res = torch.zeros(3, 5, dtype=torch.bool, device=device) 3889 res = res.scatter_add_(0, torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]], device=device), x) 3890 self.assertEqual(res, torch.tensor([[True, True, True, True, True], 3891 [False, True, False, True, False], 3892 [True, False, True, False, True]], device=device)) 3893 3894 # FIXME: find a test suite for the masked scatter operator 3895 @onlyNativeDeviceTypes 3896 @dtypes(*all_types_and_complex_and(torch.half, torch.bfloat16)) 3897 def test_masked_scatter(self, device, dtype): 3898 dt = dtype 3899 num_copy, num_dest = 3, 10 3900 dest = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dt, device=device) 3901 dest2 = dest.clone() 3902 dest_ones = dest.clone() 3903 dest_ones_expected = dest.clone() 3904 src = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=dt, device=device) 3905 src_ones = torch.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=dt, device=device) 3906 mask = torch.tensor((0, 0, 0, 0, 1, 0, 1, 0, 1, 0), dtype=torch.bool, device=device) 3907 3908 dest.masked_scatter_(mask, src) 3909 j = 0 3910 for i in range(num_dest): 3911 if mask[i]: 3912 dest2[i] = src[j] 3913 dest_ones_expected[i] = src_ones[j] 3914 j += 1 3915 self.assertEqual(dest, dest2, atol=0, rtol=0) 3916 3917 dest_ones.masked_scatter_(mask, src_ones) 3918 self.assertEqual(dest_ones, dest_ones_expected, atol=0, rtol=0) 3919 3920 # Bound checking in CUDA is done inside a kernel 3921 # in order to avoid synchronization, but this means 3922 # we can not clear the failures. So there is no way 3923 # to test it then recover. 3924 if self.device_type != 'cuda': 3925 # make src smaller. this should fail 3926 src = torch.zeros(num_copy - 1, dtype=dt, device=device) 3927 with self.assertRaises(RuntimeError): 3928 dest.masked_scatter_(mask, src) 3929 3930 # empty tensor 3931 dest = torch.empty((5, 0, 5), dtype=dt, device=device) 3932 mask = torch.ones_like(dest, dtype=torch.bool, device=device) 3933 src = torch.empty((0,), dtype=dt, device=device) 3934 dest.masked_scatter_(mask, src) 3935 3936 dest = torch.empty((5, 0, 5), dtype=dt, device=device) 3937 mask = torch.ones((5, 1, 5), dtype=torch.bool, device=device) 3938 src = torch.empty((0,), dtype=dt, device=device) 3939 dest.masked_scatter_(mask, src) 3940 3941 # FIXME: find a test suite for the masked scatter operator 3942 @skipIfMps 3943 def test_masked_scatter_bool_tensor(self, device): 3944 src = torch.tensor([True, True, True], device=device) 3945 dst = torch.tensor([False, False, False], device=device) 3946 mask = torch.tensor([False, True, False], device=device) 3947 3948 dst.masked_scatter_(mask, src) 3949 self.assertEqual(dst, torch.tensor([False, True, False], device=device)) 3950 3951 mask = torch.tensor([True, False, True], device=device) 3952 dst = dst.masked_scatter(mask, src) 3953 self.assertEqual(dst, torch.tensor([True, True, True], device=device)) 3954 3955 # FIXME: find a test suite for the masked scatter operator 3956 # test_scatter_gather_ops or test_masked_ops? 3957 @onlyCUDA 3958 @largeTensorTest('30GB') 3959 def test_masked_scatter_large_tensor(self, device): 3960 t_cpu = torch.empty(2**31 + 1, dtype=torch.bool).random_() 3961 t = t_cpu.to(device) 3962 result_cpu = t_cpu.masked_scatter(t_cpu, t_cpu) 3963 result = t.masked_scatter(t, t) 3964 self.assertEqual(result, result_cpu) 3965 3966 # FIXME: find a test suite for the masked select operator 3967 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 3968 def test_masked_select(self, device, dtype): 3969 if device == 'cpu': 3970 warn = 'masked_select received a mask with dtype torch.uint8,' 3971 else: 3972 warn = 'indexing with dtype torch.uint8 is now deprecated, pl' 3973 for maskType in integral_types_and(torch.bool): 3974 num_src = 10 3975 src = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=dtype, device=device) 3976 mask = torch.randint(2, (num_src,), device=device, dtype=maskType) 3977 3978 if maskType is not torch.bool: 3979 with self.assertRaisesRegex(RuntimeError, r'expected BoolTensor for mask'): 3980 dst = src.masked_select(mask) 3981 continue 3982 else: 3983 dst = src.masked_select(mask) 3984 dst2 = [] 3985 for i in range(num_src): 3986 if mask[i]: 3987 dst2 += [src[i]] 3988 self.assertEqual(dst, torch.tensor(dst2), atol=0, rtol=0) 3989 3990 dst3 = torch.empty(0, device=device, dtype=dtype) 3991 torch.masked_select(src, mask, out=dst3) 3992 self.assertEqual(dst3, torch.tensor(dst2, dtype=dst3.dtype), atol=0, rtol=0) 3993 3994 # Since half on CPU is not supported, need to skip the remaining test cases 3995 if dtype == torch.half and torch.device(device).type == 'cpu': 3996 return 3997 3998 # Ensure that masks are expanded to match tensor properly 3999 a = torch.rand(100, 100, device=device).mul(100).to(dtype) 4000 mask_first_el_each_row = torch.zeros(100, device=device, dtype=torch.bool) 4001 mask_first_el_each_row[0] = True 4002 a_masked = a.masked_select(mask_first_el_each_row) 4003 self.assertEqual(a_masked, a[:, 0]) 4004 4005 mask_first_row = torch.zeros(100, 1, device=device, dtype=torch.bool) 4006 mask_first_row[0][0] = True 4007 a_masked = a.masked_select(mask_first_row) 4008 self.assertEqual(a_masked, a[0, :]) 4009 4010 # Ensure that tensor is expanded to match mask properly 4011 a = torch.rand(100, device=device).mul(100).to(dtype) 4012 mask_copy_3_times = torch.tensor([[True], [True], [False], [True]], device=device) 4013 a_masked = a.masked_select(mask_copy_3_times) 4014 self.assertEqual(a_masked, a.unsqueeze(0).expand(3, 100).flatten()) 4015 4016 # FIXME: find a test suite for the masked select operator 4017 def test_masked_select_discontiguous(self, device): 4018 for size in (10, 200): 4019 vals = torch.rand(size, size, device=device) 4020 mask = torch.full((size, size), False, dtype=torch.bool, device=device) 4021 mask[:, ::2] = True 4022 vals_list = (vals, vals.t()) 4023 mask_list = (mask, mask.t()) 4024 out_dc = torch.empty(size * size, device=device)[::2] 4025 for v, m in product(vals_list, mask_list): 4026 if m.is_contiguous(): 4027 expected = v[:, ::2].clone().reshape((-1, )) 4028 else: 4029 expected = v[::2].clone().reshape((-1, )) 4030 out = torch.masked_select(v, m) 4031 self.assertEqual(out, expected, atol=0, rtol=0) 4032 torch.masked_select(v, m, out=out_dc) 4033 self.assertEqual(out_dc, expected, atol=0, rtol=0) 4034 4035 # FIXME: find a test suite for the masked fill operator 4036 @dtypes(*product(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16), (torch.uint8, torch.bool))) 4037 def test_masked_fill(self, device, dtypes): 4038 dtype = dtypes[0] 4039 mask_dtype = dtypes[1] 4040 4041 num_dest = 10 4042 dst = torch.zeros(num_dest, dtype=dtype) 4043 mask = torch.randint(2, (num_dest,), dtype=mask_dtype) 4044 val = random.random() 4045 dst2 = dst.clone() 4046 4047 if mask_dtype is not torch.bool: 4048 with self.assertRaisesRegex(RuntimeError, 'only supports boolean masks'): 4049 dst.masked_fill_(mask, val) 4050 return 4051 4052 dst.masked_fill_(mask, val) 4053 for i in range(num_dest): 4054 if mask[i]: 4055 dst2[i] = val 4056 self.assertEqual(dst, dst2, atol=0, rtol=0) 4057 4058 # test non-contiguous case 4059 dst = ((torch.randn(num_dest, num_dest, num_dest) * 10).to(dtype)).permute((2, 0, 1)) 4060 dst2 = dst.contiguous() 4061 if dtype.is_complex: 4062 mask = dst.abs() > 0 4063 else: 4064 mask = dst > 0 4065 self.assertTrue(not dst.is_contiguous()) 4066 self.assertTrue(dst2.is_contiguous()) 4067 dst.masked_fill_(mask.to(mask_dtype), val) 4068 dst2.masked_fill_(mask.to(mask_dtype), val) 4069 self.assertEqual(dst, dst2, atol=0, rtol=0) 4070 4071 # FIXME: find a test suite for the masked fill operator 4072 def test_masked_fill_bool_tensor(self, device): 4073 dst = torch.tensor([True, False, True], device=device) 4074 mask = torch.tensor([False, True, False], device=device) 4075 4076 dst.masked_fill_(mask, True) 4077 self.assertEqual(dst, torch.tensor([True, True, True], device=device)) 4078 4079 dst = dst.masked_fill(mask, False) 4080 self.assertEqual(dst, torch.tensor([True, False, True], device=device)) 4081 4082 def test_tensor_shape_empty(self, device): 4083 x = torch.randn((0, 1, 3, 0), device=device) 4084 # flatten 4085 self.assertEqual((0,), torch.flatten(x, 0, 3).shape) 4086 self.assertEqual((0, 0), torch.flatten(x, 0, 2).shape) 4087 self.assertEqual((0, 3, 0), torch.flatten(x, 1, 2).shape) 4088 4089 # squeeze, unsqueeze 4090 self.assertEqual((0, 1, 1, 3, 0), torch.unsqueeze(x, 1).shape) 4091 self.assertEqual((0, 3, 0), torch.squeeze(x, 1).shape) 4092 self.assertEqual((0, 3, 0), torch.squeeze(x).shape) 4093 4094 # transpose, t 4095 self.assertEqual((0, 0, 3, 1), torch.transpose(x, 1, 3).shape) 4096 y = torch.randn((5, 0), device=device) 4097 self.assertEqual((0, 5), y.t().shape) 4098 4099 # select 4100 self.assertEqual((0, 1, 0), torch.select(x, 2, 2).shape) 4101 4102 # repeat, permute 4103 self.assertEqual((9, 0, 5, 6, 0), x.repeat(9, 7, 5, 2, 3).shape) 4104 self.assertEqual((3, 0, 0, 1), x.permute(2, 3, 0, 1).shape) 4105 4106 # diagonal, diagflat 4107 self.assertEqual((0,), torch.diagonal(torch.randn((5, 0), device=device)).shape) 4108 self.assertEqual((0,), torch.diagonal(torch.randn((0, 5), device=device)).shape) 4109 # off the end offsets are valid 4110 self.assertEqual((0,), torch.diagonal(torch.randn((5, 0), device=device), offset=1).shape) 4111 self.assertEqual((0,), torch.diagonal(torch.randn((0, 5), device=device), offset=1).shape) 4112 # check non-zero sized offsets off the end 4113 self.assertEqual((5, 6, 0), torch.diagonal(torch.randn((3, 4, 5, 6), device=device), offset=45252).shape) 4114 self.assertEqual((5, 6, 0), torch.diagonal(torch.randn((3, 4, 5, 6), device=device), offset=-45252).shape) 4115 4116 self.assertEqual((0, 0), torch.diagflat(torch.tensor([], device=device)).shape) 4117 self.assertEqual(torch.zeros(1, 1), torch.diagflat(torch.tensor([], device=device), offset=1)) 4118 self.assertEqual((0, 0), torch.diagflat(torch.tensor([[]], device=device)).shape) 4119 self.assertEqual(torch.zeros(1, 1), torch.diagflat(torch.tensor([[]], device=device), offset=1)) 4120 4121 # stack, split, chunk 4122 self.assertEqual((4, 0, 1, 3, 0), torch.stack((x, x, x, x)).shape) 4123 self.assertEqual([(0, 1, 3, 0)], 4124 [z.shape for z in torch.chunk(x, 1, dim=0)]) 4125 4126 self.assertEqual([(0, 1, 3, 0), ] * 3, [z.shape for z in torch.chunk(x, 3, dim=0)]) 4127 self.assertEqual([(0, 1, 1, 0), ] * 3, [z.shape for z in torch.chunk(x, 3, dim=2)]) 4128 4129 # NOTE: split_with_sizes behaves differently than NumPy in that it 4130 # takes sizes rather than offsets 4131 self.assertEqual([(0, 1, 0, 0), (0, 1, 1, 0), (0, 1, 2, 0)], 4132 [z.shape for z in torch.split(x, (0, 1, 2), dim=2)]) 4133 4134 self.assertRaises(RuntimeError, lambda: torch.split(x, 0, dim=1)) 4135 # This is strange because the split size is larger than the dim size, but consistent with 4136 # how split handles that case generally (when no 0s are involved). 4137 self.assertEqual([(0, 1, 3, 0)], [z.shape for z in torch.split(x, 1, dim=0)]) 4138 self.assertEqual([(0, 1, 3, 0)], [z.shape for z in torch.split(x, 0, dim=0)]) 4139 4140 # functions that operate over a dimension but don't reduce. 4141 def test_dim_function_empty(self, device): 4142 shape = (0, 1, 2, 0) 4143 x = torch.randn(shape, device=device) 4144 4145 # size stride 4146 self.assertEqual(0, x.size(3)) 4147 self.assertEqual(2, x.size(2)) 4148 self.assertEqual(2, x.stride(0)) 4149 self.assertEqual(1, x.stride(2)) 4150 4151 self.assertEqual(x, torch.nn.functional.glu(x, 0)) 4152 self.assertEqual((0, 1, 1, 0), torch.nn.functional.glu(x, 2).shape) 4153 4154 # softmax, logsoftmax 4155 self.assertEqual(x, torch.nn.functional.softmax(x, 0)) 4156 self.assertEqual(x, torch.nn.functional.softmax(x, 2)) 4157 self.assertEqual(x, torch.nn.functional.softmax(x, 3)) 4158 4159 self.assertEqual(x, torch.nn.functional.log_softmax(x, 0)) 4160 self.assertEqual(x, torch.nn.functional.log_softmax(x, 2)) 4161 self.assertEqual(x, torch.nn.functional.log_softmax(x, 3)) 4162 4163 # cumsum, cumprod, cummax, cummin 4164 self.assertEqual(shape, torch.cumsum(x, 0).shape) 4165 self.assertEqual(shape, torch.cumsum(x, 2).shape) 4166 self.assertEqual(shape, torch.cumprod(x, 0).shape) 4167 self.assertEqual(shape, torch.cumprod(x, 2).shape) 4168 self.assertEqual(shape, torch.cummax(x, 0)[0].shape) 4169 self.assertEqual(shape, torch.cummax(x, 2)[0].shape) 4170 self.assertEqual(shape, torch.cummin(x, 0)[0].shape) 4171 self.assertEqual(shape, torch.cummin(x, 2)[0].shape) 4172 self.assertEqual(shape, torch.logcumsumexp(x, 0).shape) 4173 self.assertEqual(shape, torch.logcumsumexp(x, 2).shape) 4174 4175 # flip 4176 self.assertEqual(x, x.flip(0)) 4177 self.assertEqual(x, x.flip(2)) 4178 4179 # roll 4180 self.assertEqual(x, x.roll(0, 1).roll(0, -1)) 4181 self.assertEqual(x, x.roll(1, x.size(1))) 4182 self.assertEqual(x, x.roll(1)) 4183 self.assertEqual(x, x.roll((1, 1), (3, 1))) 4184 4185 # unbind 4186 self.assertEqual((), x.unbind(0)) 4187 self.assertEqual((torch.empty((0, 1, 0), device=device), torch.empty((0, 1, 0), device=device)), 4188 x.unbind(2)) 4189 4190 # cross 4191 y = torch.randn((0, 1, 3, 0), device=device) 4192 self.assertEqual(y.shape, torch.cross(y, y).shape) 4193 4194 # renorm 4195 self.assertEqual(shape, torch.renorm(x, 1, 0, 5).shape) 4196 self.assertEqual(shape, torch.renorm(x, 1, 2, 5).shape) 4197 4198 # sort 4199 self.assertEqual([shape, shape], [z.shape for z in torch.sort(x, dim=0)]) 4200 self.assertEqual([shape, shape], [z.shape for z in torch.sort(x, dim=2)]) 4201 4202 # topk 4203 self.assertEqual([shape, shape], [z.shape for z in torch.topk(x, 0, dim=0)]) 4204 self.assertEqual([(0, 1, 1, 0), (0, 1, 1, 0)], [z.shape for z in torch.topk(x, 1, dim=2)]) 4205 4206 y = torch.randn((2, 3, 4), device=device) 4207 self.assertEqual([(2, 3, 0), (2, 3, 0)], [z.shape for z in torch.topk(y, 0)]) 4208 4209 # gather 4210 self.assertEqual(shape, torch.gather(x, 0, torch.empty(shape, dtype=torch.int64, device=device)).shape) 4211 self.assertEqual(shape, torch.gather(x, 2, torch.empty(shape, dtype=torch.int64, device=device)).shape) 4212 larger_shape = torch.empty((0, 1, 3, 0), dtype=torch.int64, device=device) 4213 self.assertEqual(larger_shape.shape, torch.gather(x, 2, larger_shape).shape) 4214 smaller_shape = torch.empty((0, 1, 0, 0), dtype=torch.int64, device=device) 4215 self.assertEqual(smaller_shape.shape, torch.gather(x, 2, smaller_shape).shape) 4216 y = torch.randn((2, 3, 4), device=device) 4217 self.assertEqual((0, 3, 4), 4218 torch.gather(y, 0, torch.empty((0, 3, 4), dtype=torch.int64, device=device)).shape) 4219 4220 # scatter, scatter_add 4221 for dim in [0, 2]: 4222 y = torch.randn(shape, device=device) 4223 y_src = torch.randn(shape, device=device) 4224 ind = torch.empty(shape, dtype=torch.int64, device=device) 4225 self.assertEqual(shape, y.scatter_(dim, ind, y_src).shape) 4226 self.assertEqual(shape, y.scatter_add_(dim, ind, y_src).shape) 4227 4228 z = torch.randn((2, 3, 4), device=device) 4229 z_src = torch.randn((2, 3, 4), device=device) 4230 self.assertEqual(z, z.scatter_(2, torch.empty((2, 3, 0), dtype=torch.int64, device=device), z_src)) 4231 self.assertEqual(z, z.scatter_add_(2, torch.empty((2, 3, 0), dtype=torch.int64, device=device), z_src)) 4232 4233 # index_fill, index_copy, index_add 4234 c = x.clone() 4235 c_clone = c.clone() 4236 ind_empty = torch.tensor([], dtype=torch.int64, device=device) 4237 ind_01 = torch.tensor([0, 1], dtype=torch.int64, device=device) 4238 self.assertEqual(c_clone, c.index_fill_(0, ind_empty, -1)) 4239 self.assertEqual(c_clone, c.index_fill_(2, ind_empty, -1)) 4240 self.assertEqual(c_clone, c.index_fill_(2, ind_01, -1)) 4241 self.assertEqual(c_clone, c.index_copy_(0, ind_empty, torch.empty((0, 1, 2, 0), device=device))) 4242 self.assertEqual(c_clone, c.index_copy_(2, ind_empty, torch.empty((0, 1, 0, 0), device=device))) 4243 self.assertEqual(c_clone, c.index_copy_(2, ind_01, torch.empty((0, 1, 2, 0), device=device))) 4244 self.assertEqual(c_clone, c.index_add_(0, ind_empty, torch.empty((0, 1, 2, 0), device=device))) 4245 self.assertEqual(c_clone, c.index_add_(2, ind_empty, torch.empty((0, 1, 0, 0), device=device))) 4246 self.assertEqual(c_clone, c.index_add_(2, ind_01, torch.empty((0, 1, 2, 0), device=device))) 4247 4248 c = torch.randn((0, 1, 2), device=device) 4249 c_clone = c.clone() 4250 self.assertEqual(c_clone, c.index_fill_(0, ind_empty, -1)) 4251 self.assertEqual(c_clone, c.index_copy_(0, ind_empty, torch.empty((0, 1, 2), device=device))) 4252 self.assertEqual(c_clone, c.index_add_(0, ind_empty, torch.empty((0, 1, 2), device=device))) 4253 self.assertEqual(c_clone, c.index_fill_(0, ind_empty, -1)) 4254 self.assertEqual(c_clone, c.index_copy_(0, ind_empty, torch.empty((0, 1, 2), device=device))) 4255 self.assertEqual(c_clone, c.index_add_(0, ind_empty, torch.empty((0, 1, 2), device=device))) 4256 4257 # index fill/copy/add non-empty 4258 z = torch.randn((2, 3, 4), device=device) 4259 self.assertEqual(z, z.index_fill_(0, ind_empty, -1)) 4260 z = torch.randn((2, 3, 4), device=device) 4261 self.assertEqual(z, z.index_copy_(0, ind_empty, torch.empty((0, 3, 4), device=device))) 4262 z = torch.randn((2, 3, 4), device=device) 4263 self.assertEqual(z, z.index_add_(0, ind_empty, torch.empty((0, 3, 4), device=device))) 4264 4265 # index_select 4266 self.assertEqual(x, x.index_select(0, ind_empty)) 4267 self.assertEqual((0, 1, 0, 0), x.index_select(2, ind_empty).shape) 4268 self.assertEqual(x, x.index_select(2, ind_01)) 4269 z = torch.randn((2, 3, 4), device=device) # non-empty 4270 self.assertEqual((0, 3, 4), z.index_select(0, ind_empty).shape) 4271 c = torch.randn((0, 1, 2), device=device) 4272 self.assertEqual(c, c.index_select(0, ind_empty)) 4273 c = torch.randn((0, 1, 2), device=device) 4274 self.assertEqual(c, c.index_select(0, ind_empty)) 4275 w = torch.randn((0, 3), device=device) 4276 self.assertEqual((0, 2), w.index_select(1, ind_01).shape) 4277 w = torch.randn((3, 0), device=device) 4278 self.assertEqual((2, 0), w.index_select(0, ind_01).shape) 4279 ind_01_int32 = torch.tensor([0, 1], dtype=torch.int32, device=device) 4280 self.assertEqual((2, 0), w.index_select(0, ind_01_int32).shape) 4281 s = torch.randn([], device=device) 4282 ind_0 = torch.tensor([0], dtype=torch.int32, device=device) 4283 self.assertEqual([], s.index_select(0, ind_0).shape) 4284 if device == 'cpu': 4285 w = torch.randn((0, 3), device=device) 4286 with self.assertRaisesRegex(RuntimeError, "self indexing axis dim should be positive"): 4287 torch.index_select(w, 0, ind_01) 4288 ind_05 = torch.tensor([0, 5], dtype=torch.int64, device=device) 4289 with self.assertRaisesRegex(RuntimeError, "INDICES element is out of DATA bounds"): 4290 torch.index_select(w, 1, ind_05) 4291 with self.assertRaisesRegex(RuntimeError, "Index to scalar can have only 1 value"): 4292 torch.index_select(s, 0, ind_empty) 4293 with self.assertRaisesRegex(RuntimeError, "Index to scalar can have only 1 value"): 4294 torch.ones([]).index_select(0, torch.Tensor([0, 0]).int()) 4295 4296 # FIXME: find a test suite for the pdist operator 4297 @unittest.skipIf(IS_FBCODE and IS_REMOTE_GPU, "sandcastle OOM with current tpx gpu/re configuration") 4298 @skipIfRocm 4299 @onlyCUDA 4300 @largeTensorTest('32GB', device='cpu') 4301 @largeTensorTest('5GB', device='cuda') 4302 def test_pdist_norm_large(self, device): 4303 # use dim0>=46342 for forward, see: 4304 # https://github.com/pytorch/pytorch/issues/30583 4305 # Compare output using GPU with the CPU implementation 4306 x = torch.randn(50000, 1, dtype=torch.float32) # 50k * 4 bytes = 200 KB 4307 # Will require 1249975000 float32s 4308 expected_cpu = torch.pdist(x, p=2) # ~1250M * 4 bytes = 5 GB on CPU 4309 actual_cpu = torch.pdist(x.to(device), p=2).cpu() # 5 GB on GPU + 5GB on CPU 4310 # Workaround for large memory overhead of self.assertTrue (see #84944) 4311 self.assertTrue(torch.allclose(expected_cpu, actual_cpu)) # ~20GB in allclose 4312 4313 # FIXME: move to elementwise ternary test suite 4314 @onlyNativeDeviceTypes 4315 @dtypesIfCUDA(*set(get_all_math_dtypes('cuda'))) 4316 @dtypes(*set(get_all_math_dtypes('cpu'))) 4317 def test_addcdiv(self, device, dtype): 4318 # Returns floating or integral scalar corresponding to dtype 4319 def _number(floating, integer, dtype): 4320 if dtype in [torch.half, torch.float, torch.double, torch.bfloat16]: 4321 return floating 4322 elif dtype in [torch.cfloat, torch.cdouble]: 4323 return floating * (1 + 1j) 4324 else: 4325 return integer 4326 4327 def non_zero_rand(size, dtype, device): 4328 if dtype.is_floating_point or dtype.is_complex: 4329 a = torch.rand(size=size, dtype=dtype, device=device) 4330 elif dtype == torch.uint8: 4331 a = torch.randint(1, 5, size=size, dtype=dtype, device=device) 4332 else: 4333 a = torch.randint(-5, 5, size=size, dtype=dtype, device=device) 4334 return a + (a == 0).to(dtype) 4335 4336 def _test_addcdiv(): 4337 a = non_zero_rand((2, 2), dtype=dtype, device=device) 4338 b = non_zero_rand((2, 2), dtype=dtype, device=device) 4339 c = non_zero_rand((2, 2), dtype=dtype, device=device) 4340 alpha = _number(0.5, 3, dtype) 4341 4342 expected = a + (alpha * b) / c 4343 actual = torch.addcdiv(a, b, c, value=alpha) 4344 self.assertEqual(expected, actual) 4345 4346 with self.assertWarnsOnceRegex( 4347 UserWarning, "This overload of addcdiv is deprecated"): 4348 self.assertEqual(actual, torch.addcdiv(a, alpha, b, c)) 4349 4350 if not (dtype.is_floating_point or dtype.is_complex): 4351 # Integer division with addcdiv is prohibited 4352 with self.assertRaises(RuntimeError): 4353 _test_addcdiv() 4354 else: 4355 _test_addcdiv() 4356 4357 if self.device_type == 'cuda' and dtype == torch.half: 4358 a = torch.tensor([60000.0], device=device, dtype=dtype) 4359 b = torch.tensor([60000.0], device=device, dtype=dtype) 4360 c = torch.tensor([1.0], device=device, dtype=dtype) 4361 out = torch.addcmul(a, b, c, value=-2) 4362 self.assertTrue(not (out.isnan() or out.isinf())) 4363 4364 def test_nullary_op_mem_overlap(self, device): 4365 ops = ( 4366 ("random_", ()), 4367 ("uniform_", ()), 4368 ("cauchy_", ()), 4369 ("log_normal_", ()), 4370 ("exponential_", ()), 4371 ("geometric_", (0.5,)), 4372 ("normal_", ()), 4373 ) 4374 4375 x = torch.rand((1, 3)).expand((3, 3)) 4376 for op, args in ops: 4377 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4378 getattr(x, op)(*args) 4379 4380 # FIXME: move to an elementwise ternary test suite and make this an OpInfo test 4381 # https://github.com/pytorch/pytorch/issues/126474 4382 @xfailIfTorchDynamo 4383 @skipIfTorchInductor("https://github.com/pytorch/pytorch/issues/126474") 4384 @dtypes(torch.double) 4385 def test_ternary_op_mem_overlap(self, device, dtype): 4386 if device == "cpu" and TEST_WITH_TORCHINDUCTOR: 4387 self.skipTest("Failing on cpu") 4388 4389 ops = [ 4390 ("addcmul", True, True, 'cpu'), 4391 ("addcmul", True, True, 'cuda'), 4392 ("addcdiv", True, True, 'cpu'), 4393 ("addcdiv", True, True, 'cuda'), 4394 ("lerp", True, True, 'cpu'), 4395 ("lerp", True, True, 'cuda') 4396 ] 4397 4398 for (fn, has_input_output_mem_overlap_check, 4399 has_internal_mem_overlap_check, dev) in ops: 4400 if dev != device: 4401 continue 4402 out_op = getattr(torch, fn) 4403 inplace_op = getattr(torch.Tensor, fn + '_') 4404 self.check_internal_mem_overlap( 4405 inplace_op, 3, dtype, device, 4406 expected_failure=not has_internal_mem_overlap_check) 4407 self.ternary_check_input_output_mem_overlap(out_op, dev, 4408 expected_failure=not has_input_output_mem_overlap_check) 4409 4410 @expectedFailureMeta # RuntimeError not raised 4411 @dtypes(torch.double) 4412 @onlyNativeDeviceTypes 4413 def test_copy_mem_overlap(self, device, dtype): 4414 self.check_internal_mem_overlap( 4415 torch.Tensor.copy_, num_inputs=2, dtype=dtype, device=device) 4416 sz = 9 4417 doubles = torch.randn(2 * sz, dtype=dtype, device=device) 4418 self.unary_check_input_output_mem_overlap( 4419 doubles, sz, lambda input, out: out.copy_(input)) 4420 4421 # FIXME: convert to ErrorInputs 4422 # (but have to extend ErrorInputs to handle inplace-only errors!) 4423 @onlyNativeDeviceTypes 4424 def test_index_add_mem_overlap(self, device): 4425 x = torch.rand((1,), device=device).expand((6,)) 4426 y = torch.rand((6,), device=device) 4427 ind = torch.tensor([2, 1, 0], device=device) 4428 value = torch.rand((3,), device=device) 4429 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4430 x.index_add_(0, ind, value) 4431 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4432 y.index_add_(0, ind, y[:3]) 4433 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4434 ind.index_add_(0, ind, ind.clone()) 4435 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4436 ind.index_add_(0, ind.clone(), ind) 4437 4438 # FIXME: convert to ErrorInputs 4439 # (but have to extend ErrorInputs to handle inplace-only errors!) 4440 @onlyNativeDeviceTypes 4441 def test_index_copy_mem_overlap(self, device): 4442 x = torch.rand((1,), device=device).expand((6,)) 4443 y = torch.rand((6,), device=device) 4444 ind = torch.tensor([2, 1, 0], device=device) 4445 value = torch.rand((3,), device=device) 4446 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4447 x.index_copy_(0, ind, value) 4448 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4449 y.index_copy_(0, ind, y[:3]) 4450 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4451 ind.index_copy_(0, ind, ind.clone()) 4452 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4453 ind.index_copy_(0, ind.clone(), ind) 4454 4455 # FIXME: convert to ErrorInputs 4456 # (but have to extend ErrorInputs to handle inplace-only errors!) 4457 @expectedFailureMeta # Warning not triggered 4458 @onlyNativeDeviceTypes 4459 def test_index_fill_mem_overlap(self, device): 4460 x = torch.rand((1,), device=device).expand((6,)) 4461 y = torch.rand((6,), device=device) 4462 ind = torch.tensor([2, 1, 0], device=device) 4463 value = torch.rand((3,), device=device) 4464 4465 with self.assertWarnsRegex(UserWarning, "index_fill_ on expanded tensors"): 4466 x.index_fill_(0, ind, 1.0) 4467 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4468 ind.index_fill_(0, ind, 0) 4469 4470 # FIXME: convert to ErrorInputs 4471 @expectedFailureMeta # RuntimeError not raised 4472 @onlyNativeDeviceTypes 4473 def test_shift_mem_overlap(self, device): 4474 x = torch.rand(3, device=device) 4475 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4476 x[:-1] <<= x[1:] 4477 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4478 x[:-1] >>= x[1:] 4479 4480 # FIXME: convert to ErrorInputs 4481 # (but have to extend ErrorInputs to handle inplace-only errors) 4482 @expectedFailureMeta # RuntimeError not raised 4483 @onlyNativeDeviceTypes 4484 def test_bernoulli_mem_overlap(self, device): 4485 x = torch.rand((1,), device=device).expand((6,)) 4486 4487 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4488 x.bernoulli_() 4489 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4490 x.bernoulli_(p=0.1) 4491 p = torch.rand(6, device=device) 4492 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4493 x.bernoulli_(p=p) 4494 4495 # FIXME: convert to ErrorInputs 4496 # (but have to extend ErrorInputs to handle inplace-only errors!) 4497 @expectedFailureMeta # RuntimeError not raised 4498 @onlyNativeDeviceTypes 4499 def test_put_mem_overlap(self, device): 4500 x = torch.rand((1,), device=device).expand((6,)) 4501 y = torch.rand((6,), device=device) 4502 ind = torch.tensor([2, 1, 0], device=device) 4503 value = torch.rand((3,), device=device) 4504 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4505 x.put_(ind, value) 4506 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4507 y.put_(ind[0], y[0]) 4508 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4509 ind.put_(ind, ind) 4510 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4511 y.put_(ind, y[:3]) 4512 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4513 ind.put_(ind, ind.clone()) 4514 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4515 ind.put_(ind.clone(), ind) 4516 4517 # FIXME: convert to ErrorInputs 4518 # (but have to extend ErrorInputs to handle inplace-only errors!) 4519 @expectedFailureMeta # UserWarning not triggered 4520 @onlyNativeDeviceTypes 4521 def test_index_put_mem_overlap(self, device): 4522 x = torch.rand((1,), device=device).expand((6,)) 4523 y = torch.rand((6,), device=device) 4524 ind = torch.tensor([2, 1, 0], device=device) 4525 value = torch.rand((3,), device=device) 4526 with self.assertWarnsRegex(UserWarning, 'expanded tensors'): 4527 x.index_put_((ind,), value) 4528 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4529 y.index_put_((ind,), y[0]) 4530 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4531 ind.index_put_((ind,), ind) 4532 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4533 y.index_put_((ind,), y[:3]) 4534 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4535 ind.index_put_((ind,), ind.clone()) 4536 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4537 ind.index_put_((ind.clone(),), ind) 4538 4539 # FIXME: convert to ErrorInputs 4540 # (but have to extend ErrorInputs to handle inplace-only errors!) 4541 @expectedFailureMeta # UserWarning not triggered 4542 @onlyNativeDeviceTypes 4543 def test_masked_fill_mem_overlap(self, device): 4544 x = torch.rand((1,), device=device).expand((6,)) 4545 mask = torch.tensor([True, False, True, True, False, False], device=device) 4546 with self.assertWarnsRegex(UserWarning, 'expanded tensors'): 4547 x.masked_fill_(mask, 0.) 4548 4549 fill_val = torch.tensor(0., device=device) 4550 with self.assertWarnsRegex(UserWarning, 'expanded tensors'): 4551 x.masked_fill_(mask, fill_val) 4552 4553 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4554 mask[1:].masked_fill_(mask[:-1], False) 4555 4556 # FIXME: convert to ErrorInputs 4557 # (but have to extend ErrorInputs to handle inplace-only errors!) 4558 @expectedFailureMeta # RuntimeError not raised 4559 @onlyNativeDeviceTypes 4560 def test_masked_scatter_mem_overlap(self, device): 4561 x = torch.rand((1,), device=device).expand((6,)) 4562 src = torch.rand((3,), device=device) 4563 mask = torch.tensor([True, False, True, True, False, False], device=device) 4564 4565 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4566 x.masked_scatter_(mask, src) 4567 4568 # FIXME: convert to ErrorInputs 4569 # (but have to extend ErrorInputs to handle inplace-only errors!) 4570 @onlyNativeDeviceTypes 4571 def test_scatter_mem_overlap(self, device): 4572 x = torch.rand((1,), device=device).expand((6,)) 4573 src = torch.rand((3,), device=device) 4574 ind = torch.tensor([2, 1, 0], device=device, dtype=torch.int64) 4575 4576 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4577 x.scatter_(0, ind, src) 4578 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4579 src.scatter_(0, ind, src) 4580 with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): 4581 ind.scatter_(0, ind, ind.clone()) 4582 4583 # FIXME: move to test distributions 4584 @onlyCUDA 4585 def test_multinomial_device_constrain(self, device): 4586 x = torch.empty(3, device="cpu") 4587 y = torch.empty(3, device=device) 4588 self.assertRaisesRegex( 4589 RuntimeError, "Expected all tensors to be on the same device", 4590 lambda: torch.multinomial(x, 2, out=y)) 4591 4592 # FIXME: move to test distributions 4593 @deviceCountAtLeast(2) 4594 @onlyCUDA 4595 @skipIfTorchInductor("FIXME: error not thrown") 4596 def test_multinomial_gpu_device_constrain(self, devices): 4597 x = torch.empty(3, device=devices[0]) 4598 y = torch.empty(3, device=devices[1], dtype=torch.long) 4599 self.assertRaisesRegex( 4600 RuntimeError, "Expected all tensors to be on the same device", 4601 lambda: torch.multinomial(x, 2, out=y)) 4602 4603 # FIXME: convert this to an automated OpInfo test 4604 @deviceCountAtLeast(2) 4605 @onlyCUDA 4606 def test_device_guard(self, devices): 4607 # verify that all operators with `device_guard: False` behave properly with multiple devices. 4608 # TODO: if we had operator introspection we could figure out this set of operators automatically... 4609 x = torch.randn((1, 2, 3), device=devices[1]) 4610 y = torch.zeros((1, 3, 2), device=devices[1]) 4611 scalar = torch.tensor(5, device=devices[1]) 4612 4613 # property ops 4614 torch.cudnn_is_acceptable(x) 4615 x.is_distributed() 4616 x.is_floating_point() 4617 x.is_complex() 4618 x.is_same_size(y) 4619 x.is_signed() 4620 x.size(0) 4621 x.stride(0) 4622 x.numel() 4623 x.is_set_to(y) 4624 x.data_ptr() 4625 scalar.is_nonzero() 4626 4627 # sparse property ops 4628 y[0][1] = 5 4629 y_sparse = y.to_sparse() 4630 y_sparse.sparse_dim() 4631 y_sparse._dimI() 4632 y_sparse.dense_dim() 4633 y_sparse._dimV() 4634 y_sparse._nnz() 4635 y_sparse.is_coalesced() 4636 y_sparse._indices() 4637 y_sparse._values() 4638 y_sparse.indices() 4639 y_sparse.values() 4640 4641 # in-place ops 4642 def inplace(): 4643 return torch.randn((1, 2, 3), device=devices[1]) 4644 inplace().as_strided_(y.size(), y.stride()) 4645 inplace().resize_(y.size()) 4646 inplace().squeeze_() 4647 inplace().squeeze_(0) 4648 inplace().unsqueeze_(2) 4649 inplace().transpose_(1, 2) 4650 inplace().squeeze_().t_() 4651 inplace().set_(x.storage()) 4652 inplace().set_(x.storage(), x.storage_offset(), x.size(), x.stride()) 4653 inplace().set_(x) 4654 inplace().set_() 4655 y_sparse._coalesced_(True) 4656 4657 # shape modification 4658 x.as_strided(y.size(), y.stride()) 4659 x.expand((5, 2, 3)) 4660 x.expand_as(x) 4661 x.sum_to_size((1,)) 4662 torch.broadcast_tensors(x , x) 4663 x.reshape((1, 3, 2)) 4664 x.reshape_as(y) 4665 x.squeeze() 4666 x.squeeze(0) 4667 x.squeeze().t() 4668 x.transpose(1, 2) 4669 x.unsqueeze(2) 4670 x.view((1, 3, 2)) 4671 x.view_as(y) 4672 4673 # chunk, split, etc. 4674 x.chunk(2, dim=1) 4675 x.split(1, dim=2) 4676 x.split_with_sizes([1, 2], dim=2) 4677 x.unfold(dimension=2, size=1, step=1) 4678 4679 x.narrow(1, 1, 1) 4680 x.select(1, 1) 4681 torch.isnan(x) 4682 4683 torch.empty((1, 3, 2), out=y) 4684 torch.empty_like(x) 4685 torch.empty_like(x, dtype=torch.int64) 4686 4687 # to 4688 x.to(x) 4689 x.to(y) 4690 x.to(x, copy=True) 4691 4692 def test_is_signed(self, device): 4693 self.assertEqual(torch.IntTensor(5).to(device).is_signed(), True) 4694 self.assertEqual(torch.ByteTensor(5).to(device).is_signed(), False) 4695 self.assertEqual(torch.CharTensor(5).to(device).is_signed(), True) 4696 self.assertEqual(torch.FloatTensor(5).to(device).is_signed(), True) 4697 self.assertEqual(torch.HalfTensor(10).to(device).is_signed(), True) 4698 4699 def test_tensor_type(self): 4700 for t in torch._tensor_classes: 4701 if 'cuda' in t.__module__: 4702 self.assertEqual(t.is_cuda, True) 4703 else: 4704 self.assertEqual(t.is_cuda, False) 4705 if 'xpu' in t.__module__: 4706 self.assertEqual(t.is_xpu, True) 4707 else: 4708 self.assertEqual(t.is_xpu, False) 4709 4710 # Note - reports a leak of 512 bytes on CUDA device 1 4711 @deviceCountAtLeast(2) 4712 @skipCUDAMemoryLeakCheckIf(True) 4713 @onlyCUDA 4714 def test_tensor_set_errors_multigpu(self, devices): 4715 f_cuda0 = torch.randn((2, 3), dtype=torch.float32, device=devices[0]) 4716 f_cuda1 = torch.randn((2, 3), dtype=torch.float32, device=devices[1]) 4717 4718 self.assertRaises(RuntimeError, lambda: f_cuda0.set_(f_cuda1.storage())) 4719 self.assertRaises(RuntimeError, 4720 lambda: f_cuda0.set_(f_cuda1.storage(), 0, f_cuda1.size(), f_cuda1.stride())) 4721 self.assertRaises(RuntimeError, lambda: f_cuda0.set_(f_cuda1)) 4722 4723 # FIXME: move to test_serialization 4724 @onlyCUDA 4725 @deviceCountAtLeast(1) # Note: Tests works with one but prefers more devices 4726 def test_serialization(self, devices): 4727 def _test_serialization(filecontext_lambda): 4728 t0 = torch.cuda.FloatTensor(5).fill_(1) 4729 with torch.cuda.device(devices[-1]): 4730 tn = torch.cuda.FloatTensor(3).fill_(2) 4731 torch.cuda.set_device(devices[0]) 4732 b = (t0, tn) 4733 with filecontext_lambda() as f: 4734 torch.save(b, f) 4735 f.seek(0) 4736 c = torch.load(f) 4737 self.assertEqual(b, c, atol=0, rtol=0) 4738 u0, un = c 4739 self.assertEqual(str(u0.device), devices[0]) 4740 self.assertEqual(str(un.device), devices[-1]) 4741 4742 _test_serialization(tempfile.NamedTemporaryFile) 4743 _test_serialization(BytesIOContext) 4744 4745 # FIXME: move memory format tests to their own test class/suite 4746 def test_memory_format_preserved_after_permute(self, device): 4747 x = torch.randn(4, 3, 8, 8, device=device) 4748 nhwc = x.contiguous(memory_format=torch.channels_last) 4749 y = nhwc.permute(0, 1, 3, 2).permute(0, 1, 3, 2) 4750 self.assertTrue(y.is_contiguous(memory_format=torch.channels_last)) 4751 4752 x = torch.randn(4, 3, 8, 8, 8, device=device) 4753 ndhwc = x.contiguous(memory_format=torch.channels_last_3d) 4754 y = ndhwc.permute(0, 1, 4, 3, 2).permute(0, 1, 4, 3, 2) 4755 self.assertTrue(y.is_contiguous(memory_format=torch.channels_last_3d)) 4756 4757 def test_memory_format_propagation_rules(self, device): 4758 4759 contiguous = torch.rand(10, 3, 5, 5, device=device) 4760 cl = torch.rand(10, 3, 5, 5, device=device).contiguous(memory_format=torch.channels_last) 4761 ambiguous = torch.rand(10, 3, 1, 1, device=device).contiguous(memory_format=torch.channels_last) 4762 self.assertTrue(ambiguous.is_contiguous(memory_format=torch.channels_last)) 4763 self.assertTrue(ambiguous.is_contiguous(memory_format=torch.contiguous_format)) 4764 bias = torch.rand(1, 1, 1, 1, device=device).contiguous(memory_format=torch.channels_last) 4765 4766 def _test_propagation_rules(self, contiguous, cl, ambiguous, bias): 4767 options = ((ambiguous, contiguous, torch.contiguous_format), 4768 (ambiguous, cl, torch.channels_last), 4769 (contiguous, ambiguous, torch.contiguous_format), 4770 (contiguous, cl, torch.contiguous_format), 4771 (cl, ambiguous, torch.channels_last), 4772 (cl, contiguous, torch.channels_last), 4773 (bias, cl, torch.channels_last), 4774 (cl, bias, torch.channels_last),) 4775 4776 for a, b, mf in options: 4777 result = a + b 4778 self.assertTrue(result.is_contiguous(memory_format=mf)) 4779 4780 _test_propagation_rules(self, contiguous, cl, ambiguous, bias) 4781 4782 cl = cl.to(memory_format=torch.channels_last) 4783 ambiguous = ambiguous.to(memory_format=torch.channels_last) 4784 bias = bias.to(memory_format=torch.channels_last) 4785 4786 _test_propagation_rules(self, contiguous, cl, ambiguous, bias) 4787 4788 # test cases when strides matter in ambiguous tensors 4789 for mf in (torch.channels_last, torch.contiguous_format): 4790 ambiguous = torch.rand(10, 3, 1, 1, device=device).to(memory_format=mf) 4791 bias = torch.rand(3, 1, 1, device=device) 4792 result = ambiguous + bias 4793 self.assertEqual(ambiguous.stride(), result.stride()) 4794 result = bias + ambiguous 4795 self.assertEqual(ambiguous.stride(), result.stride()) 4796 result = ambiguous * 5 4797 self.assertEqual(ambiguous.stride(), result.stride()) 4798 4799 @skipIfMps 4800 def test_memory_format_empty_like(self, device): 4801 def test_helper(x, memory_format): 4802 xc = x.contiguous(memory_format=memory_format) 4803 4804 like = torch.empty_like(xc, memory_format=torch.preserve_format) 4805 self.assertFalse(like.is_contiguous()) 4806 self.assertTrue(like.is_contiguous(memory_format=memory_format)) 4807 4808 like_x = torch.empty_like(x, memory_format=torch.preserve_format) 4809 self.assertTrue(like_x.is_contiguous()) 4810 self.assertFalse(like_x.is_contiguous(memory_format=memory_format)) 4811 4812 like = torch.empty_like(x, memory_format=memory_format) 4813 self.assertFalse(like.is_contiguous()) 4814 self.assertTrue(like.is_contiguous(memory_format=memory_format)) 4815 4816 like = torch.empty_like(xc, memory_format=torch.contiguous_format) 4817 self.assertTrue(like.is_contiguous()) 4818 self.assertFalse(like.is_contiguous(memory_format=memory_format)) 4819 4820 like = torch.empty_like(xc) 4821 self.assertFalse(like.is_contiguous()) 4822 self.assertTrue(like.is_contiguous(memory_format=memory_format)) 4823 4824 sparse = x.to_sparse() 4825 with self.assertRaises(RuntimeError): 4826 z = torch.empty_like(sparse, memory_format=torch.preserve_format) 4827 4828 test_helper(torch.randn(4, 3, 8, 8, device=device), torch.channels_last) 4829 test_helper(torch.randn(4, 3, 8, 8, 8, device=device), torch.channels_last_3d) 4830 4831 def test_memory_format_consistency(self, device): 4832 x = torch.randn(10, 3, 1, 1, device=device) 4833 x_rep = x.as_strided(x.size(), x.stride()) 4834 self.assertEqual(x.size(), x_rep.size()) 4835 self.assertEqual(x.stride(), x_rep.stride()) 4836 self.assertEqual(x.is_contiguous(), x_rep.is_contiguous()) 4837 self.assertEqual(x.is_contiguous(memory_format=torch.channels_last), x_rep.is_contiguous(memory_format=torch.channels_last)) 4838 self.assertEqual( 4839 x.is_contiguous(memory_format=torch.channels_last_3d), x_rep.is_contiguous(memory_format=torch.channels_last_3d)) 4840 4841 # FIXME: make this a elementwise unary and elementwise binary OpInfo test 4842 def test_memory_format_operators(self, device): 4843 def _chunk_op(x, y): 4844 x1, x2 = x.chunk(2, dim=1) 4845 return x1 + x2 4846 4847 def _unsqueeze_op_add(x, y): 4848 return x[0].unsqueeze(0) + 3 4849 4850 def _unsqueeze_op_clone(x, y): 4851 return x[0].unsqueeze(0).clone() 4852 4853 def _test_helper(x, y, bias, memory_format): 4854 return_contig_fns = [ 4855 lambda x, y: y + x, 4856 lambda x, y: y * x, 4857 lambda x, y: y.addcdiv(x, y, value=2), 4858 lambda x, y: y.addcmul(x, y, value=2), 4859 ] 4860 bias_fns = [ 4861 lambda x, b: x + b, 4862 lambda x, b: b + x, 4863 ] 4864 fns = [ 4865 lambda x, y: x.clone(), 4866 lambda x, y: x + 3, 4867 lambda x, y: 3 * x, 4868 lambda x, y: x + y, 4869 lambda x, y: x * y, 4870 lambda x, y: abs(x), 4871 lambda x, y: x.abs(), 4872 lambda x, y: x.abs_(), 4873 lambda x, y: x.acos(), 4874 lambda x, y: x.acos_(), 4875 lambda x, y: x.add(y, alpha=3), 4876 lambda x, y: x.add_(y, alpha=3), 4877 lambda x, y: x.addcdiv(y, y, value=2), 4878 lambda x, y: x.addcdiv_(y, y, value=2), 4879 lambda x, y: x.addcmul(y, y, value=2), 4880 lambda x, y: x.addcmul_(y, y, value=2), 4881 lambda x, y: x.acosh(), 4882 lambda x, y: x.acosh_(), 4883 lambda x, y: x.asinh(), 4884 lambda x, y: x.asinh_(), 4885 lambda x, y: x.atanh(), 4886 lambda x, y: x.atanh_(), 4887 lambda x, y: x.asin(), 4888 lambda x, y: x.asin_(), 4889 lambda x, y: x.atan(), 4890 lambda x, y: x.atan2(y), 4891 lambda x, y: x.atan2_(y), 4892 lambda x, y: x.ceil(), 4893 lambda x, y: x.ceil_(), 4894 lambda x, y: x.clamp(-1, 1), 4895 lambda x, y: x.cos(), 4896 lambda x, y: x.cosh(), 4897 lambda x, y: x.div(0.5), 4898 lambda x, y: x.div_(0.5), 4899 lambda x, y: x.div(y), 4900 lambda x, y: x.div_(y), 4901 lambda x, y: x.digamma(), 4902 lambda x, y: x.digamma_(), 4903 lambda x, y: x.erf(), 4904 lambda x, y: x.erfc(), 4905 lambda x, y: x.erfinv(), 4906 lambda x, y: x.erfinv_(), 4907 lambda x, y: x.exp(), 4908 lambda x, y: x.expm1(), 4909 lambda x, y: x.expm1_(), 4910 lambda x, y: x.floor(), 4911 lambda x, y: x.floor_(), 4912 lambda x, y: x.fmod(2), 4913 lambda x, y: x.frac(), 4914 lambda x, y: x.hypot(y), 4915 lambda x, y: x.hypot_(y), 4916 lambda x, y: x.i0(), 4917 lambda x, y: x.i0_(), 4918 lambda x, y: x.lerp(y, 0.5), 4919 lambda x, y: x.log(), 4920 lambda x, y: x.log_(), 4921 lambda x, y: x.log10(), 4922 lambda x, y: x.log10_(), 4923 lambda x, y: x.log1p(), 4924 lambda x, y: x.log1p_(), 4925 lambda x, y: x.log2(), 4926 lambda x, y: x.log2_(), 4927 lambda x, y: x.mul(3), 4928 lambda x, y: x.mul_(3), 4929 lambda x, y: x.neg(), 4930 lambda x, y: x.neg_(), 4931 lambda x, y: x.pow(3), 4932 lambda x, y: x.pow_(3), 4933 lambda x, y: x.pow(0.0), 4934 lambda x, y: x.pow(1.0), 4935 lambda x, y: x.reciprocal(), 4936 lambda x, y: x.remainder(2), 4937 lambda x, y: x.round(), 4938 lambda x, y: x.round_(), 4939 lambda x, y: x.rsqrt(), 4940 lambda x, y: x.rsqrt_(), 4941 lambda x, y: x.sigmoid(), 4942 lambda x, y: x.sigmoid_(), 4943 lambda x, y: x.logit(), 4944 lambda x, y: x.logit_(), 4945 lambda x, y: x.logit(1e-6), 4946 lambda x, y: x.logit_(1e-6), 4947 lambda x, y: x.sign(), 4948 lambda x, y: x.sign_(), 4949 lambda x, y: x.sgn(), 4950 lambda x, y: x.sgn_(), 4951 lambda x, y: x.sin(), 4952 lambda x, y: x.sin_(), 4953 lambda x, y: x.sinh(), 4954 lambda x, y: x.sinh_(), 4955 lambda x, y: x.sqrt(), 4956 lambda x, y: x.sqrt_(), 4957 lambda x, y: x.tan(), 4958 lambda x, y: x.tanh(), 4959 lambda x, y: x.trunc(), 4960 lambda x, y: x.trunc_(), 4961 _chunk_op, 4962 _unsqueeze_op_add, 4963 _unsqueeze_op_clone, 4964 ] 4965 x_c = x.contiguous() 4966 y_c = y.contiguous() 4967 b_c = bias.contiguous() 4968 for fn in fns: 4969 is_inplace = '_(' in inspect.getsource(fn) 4970 x_clone = x.clone() if is_inplace else x 4971 x_c_clone = x_c.clone() if is_inplace else x_c 4972 result_c = fn(x_c_clone, y_c) 4973 result = fn(x_clone, y) 4974 self.assertEqual(result, result_c, f"Failed for '{inspect.getsource(fn).strip()}'") 4975 self.assertTrue( 4976 result.is_contiguous(memory_format=memory_format), 4977 f"result of the '{inspect.getsource(fn).strip()}' is not in '{memory_format}' format") 4978 4979 for fn in bias_fns: 4980 result_c = fn(x_c, b_c) 4981 result = fn(x, bias) 4982 self.assertEqual(result, result_c, f"Failed for '{inspect.getsource(fn).strip()}'") 4983 self.assertTrue( 4984 result.is_contiguous(memory_format=memory_format), 4985 f"result of the '{inspect.getsource(fn).strip()}' is not in '{memory_format}' format") 4986 4987 for fn in return_contig_fns: 4988 result_c = fn(x_c, y_c) 4989 result = fn(x, y) 4990 self.assertEqual(result, result_c, f"Failed for '{inspect.getsource(fn).strip()}'") 4991 self.assertTrue( 4992 result.is_contiguous(memory_format=torch.contiguous_format), 4993 f"result of the '{inspect.getsource(fn).strip()}' is not in '{torch.contiguous_format}' format") 4994 4995 _test_helper( 4996 torch.randn((4, 3, 8, 8), device=device).contiguous(memory_format=torch.channels_last), 4997 abs(torch.randn((4, 3, 8, 8), device=device)) + 1, 4998 torch.randn((1, 3, 1, 1), device=device).contiguous(memory_format=torch.channels_last), 4999 torch.channels_last) 5000 _test_helper( 5001 torch.randn((4, 3, 8, 8, 8), device=device).contiguous(memory_format=torch.channels_last_3d), 5002 abs(torch.randn((4, 3, 8, 8, 8), device=device)) + 1, 5003 torch.randn((1, 3, 1, 1, 1), device=device).contiguous(memory_format=torch.channels_last_3d), 5004 torch.channels_last_3d) 5005 5006 # FIXME: make this a elementwise unary and elementwise binary OpInfo test 5007 def test_strides_propagation(self, device): 5008 def _test_helper(x, op, unary=False): 5009 def compare_strides(s1, s2, div): 5010 sdiv = [s // div for s in s1] 5011 self.assertEqual(sdiv, s2) 5012 5013 dim = x.dim() 5014 # we produce memory dense outputs, so when input is strided on the last dimension 5015 # we need to divide by that dimension stride to compare input and result strides 5016 div = x.stride(-1) 5017 for p in permutations(range(dim)): 5018 xp = x.permute(p) 5019 if not unary: 5020 y = torch.randn(xp.size(-1), device=x.device, dtype=x.dtype) 5021 for inputs in ((xp, xp), (xp, y), (y, xp)): 5022 res = op(*inputs) 5023 compare_strides(xp.stride(), res.stride(), div) 5024 self.assertEqual(xp.size(), res.size()) 5025 out = torch.empty(0, device=xp.device, dtype=res.dtype) 5026 res = op(*inputs, out=out) 5027 compare_strides(xp.stride(), res.stride(), div) 5028 self.assertEqual(xp.size(), res.size()) 5029 else: 5030 res = op(xp) 5031 compare_strides(xp.stride(), res.stride(), div) 5032 self.assertEqual(xp.size(), res.size()) 5033 out = torch.empty(0, device=xp.device, dtype=res.dtype) 5034 res = op(xp, out=out) 5035 compare_strides(xp.stride(), res.stride(), div) 5036 self.assertEqual(xp.size(), res.size()) 5037 5038 # torch.eq by default calls TensorIterator with defined output, torch.add with undefined 5039 binary_ops = (torch.eq, torch.add) 5040 unary_ops = (torch.exp,) 5041 # memory dense, sliced and ambiguous sliced (ambiguous dense loses permutation information) 5042 xs = (torch.randn(2, 3, 4, device=device), torch.randn(2, 3, 8, device=device)[:, :, ::2], 5043 torch.randn(1, 1, 4, 12, device=device)[:, :, :, ::2]) 5044 for op in binary_ops: 5045 for x in xs: 5046 _test_helper(x, op) 5047 for op in unary_ops: 5048 for x in xs: 5049 _test_helper(x, op, unary=True) 5050 5051 @onlyCUDA 5052 @unittest.skipIf(PYTORCH_CUDA_MEMCHECK, "is_pinned uses failure to detect pointer property") 5053 @skipIfTorchDynamo("NotImplementedError: PrimTorch does not support pinned memory") 5054 def test_pin_memory_from_constructor(self, device): 5055 def _get_like(t, **kwargs): 5056 return [ 5057 torch.rand_like(t, **kwargs), 5058 torch.randn_like(t, **kwargs), 5059 torch.empty_like(t, **kwargs), 5060 torch.full_like(t, 4, **kwargs), 5061 torch.zeros_like(t, **kwargs), 5062 torch.ones_like(t, **kwargs), 5063 ] 5064 5065 def _get_tensors(**kwargs): 5066 return [ 5067 torch.tensor([10, 11], **kwargs), 5068 torch.randn(3, 5, **kwargs), 5069 torch.rand(3, **kwargs), 5070 # torch.randint(3, 5, **kwargs), // unsupported 5071 torch.zeros(3, **kwargs), 5072 torch.randperm(3, **kwargs), 5073 torch.empty(6, **kwargs), 5074 torch.ones(6, **kwargs), 5075 torch.eye(6, **kwargs), 5076 torch.arange(3, 5, **kwargs)] 5077 5078 pinned_tensors = _get_tensors(pin_memory=True) + _get_like(torch.empty(5, dtype=torch.float64), pin_memory=True) 5079 for x in pinned_tensors: 5080 self.assertTrue(x.is_pinned()) 5081 5082 tensors = _get_tensors() + _get_like(torch.empty(5, dtype=torch.float64, pin_memory=True)) 5083 for x in tensors: 5084 self.assertFalse(x.is_pinned()) 5085 5086 @deviceCountAtLeast(1) 5087 @onlyCUDA 5088 def test_storage_all_devices(self, devices): 5089 for device in devices: 5090 t = torch.tensor((), device=device) 5091 self.assertEqual(t.dtype, t.storage().dtype) 5092 5093 # Note [lazy_clone_ tests with inductor enabled] 5094 # These `lazy_clone_` tests are written in a way that makes them pass in 5095 # both eager mode and compiled mode (`PYTORCH_TEST_WITH_INDUCTOR=1`). There 5096 # are cases where COW tensors can materialize at different times and in 5097 # different ways in compiled mode versus eager mode, and those cases need to 5098 # be avoided. There are two main wrinkles the be aware of. 5099 # 5100 # The first wrinkle is that these tests have to check the internal 5101 # properties of tensors to make sure they materialize in the expected way, 5102 # and those checks cause dynamo graph breaks. Depending on the situation, a 5103 # graph break in-between two compiled graphs that operate on the same COW 5104 # tensor can make the tensor materialize when it would not materialize in 5105 # eager mode, causing the checks to fail. The strategy for avoiding this is 5106 # to make all the operations on COW tensors get compiled into the same 5107 # graph, by not doing any checks between the operations, and just do all the 5108 # checks at the end of the test. If we really do want to perform checks 5109 # between two operations, `op1` and `op2`, the solution is to create two 5110 # different tests. One test performs just `op1` and then checks. The other 5111 # test performs `op1` followed immediately by `op2` and then checks. 5112 # 5113 # The second wrinkle is that in eager mode, if we perform writes on two COW 5114 # tensors where one is a lazy clone of the other, the first tensor to be 5115 # written will be materialized with a new data pointer, and the second 5116 # tensor will just reuse the original data pointer when it is materialized. 5117 # But in compiled mode, if these writes happen in the same graph, the order 5118 # in which the tensors materialize can be different than in eager mode. So 5119 # in this case the strategy is to purposefully cause a graph break to happen 5120 # in-between the two write operations, by adding checks between them, so 5121 # that they have to materialize in the expected order. 5122 @skipXLA 5123 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 5124 def test_lazy_clone(self, device, dtype): 5125 t = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype) 5126 t_orig_storage_addr = torch._C._storage_address(t) 5127 orig_data_ptr = torch._C._data_address(t) 5128 clone = t._lazy_clone() 5129 5130 # Lazy cloning a tensor should cause both it and its clone to become COW 5131 # tensors. They should have different storages, but the same data 5132 # pointer. 5133 5134 self.assertTrue(torch._C._is_cow_tensor(clone)) 5135 self.assertTrue(torch._C._is_cow_tensor(t)) 5136 5137 self.assertTrue(torch._C._storage_address(t) == t_orig_storage_addr) 5138 self.assertTrue(torch._C._storage_address(clone) != t_orig_storage_addr) 5139 5140 self.assertTrue(torch._C._data_address(t) == orig_data_ptr) 5141 self.assertTrue(torch._C._data_address(clone) == orig_data_ptr) 5142 5143 # See Note [lazy_clone_ tests with inductor enabled] 5144 @skipXLA 5145 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 5146 def test_lazy_clone_view(self, device, dtype): 5147 t = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype) 5148 t_orig_storage_addr = torch._C._storage_address(t) 5149 orig_data_ptr = torch._C._data_address(t) 5150 clone = t._lazy_clone() 5151 view = t.view([4]) 5152 5153 # Viewing `t` should not cause a copy (materialize) to happen. All the 5154 # tensors should still be COW and have the same data pointer. `view` and 5155 # `t` should have the same storage, and `clone` should have a different 5156 # storage. 5157 5158 self.assertTrue(torch._C._is_cow_tensor(t)) 5159 self.assertTrue(torch._C._is_cow_tensor(view)) 5160 self.assertTrue(torch._C._is_cow_tensor(clone)) 5161 5162 self.assertTrue(torch._C._storage_address(t) == t_orig_storage_addr) 5163 self.assertTrue(torch._C._storage_address(view) == t_orig_storage_addr) 5164 self.assertTrue(torch._C._storage_address(clone) != t_orig_storage_addr) 5165 5166 self.assertTrue(torch._C._data_address(t) == orig_data_ptr) 5167 self.assertTrue(torch._C._data_address(clone) == orig_data_ptr) 5168 self.assertTrue(torch._C._data_address(view) == orig_data_ptr) 5169 5170 # See Note [lazy_clone_ tests with inductor enabled] 5171 @skipXLA 5172 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 5173 def test_lazy_clone_view_materialize(self, device, dtype): 5174 t = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype) 5175 t_orig_storage_addr = torch._C._storage_address(t) 5176 orig_data_ptr = torch._C._data_address(t) 5177 clone = t._lazy_clone() 5178 view = t.view([4]) 5179 view += torch.ones(1, device=device, dtype=dtype) 5180 5181 # Writing to `t` should cause the storage under `t` and `view` to be 5182 # copied (materialized), but should not affect `clone`. 5183 5184 self.assertFalse(torch._C._is_cow_tensor(t)) 5185 self.assertFalse(torch._C._is_cow_tensor(view)) 5186 self.assertTrue(torch._C._is_cow_tensor(clone)) 5187 5188 self.assertTrue(torch._C._storage_address(t) == t_orig_storage_addr) 5189 self.assertTrue(torch._C._storage_address(view) == t_orig_storage_addr) 5190 self.assertTrue(torch._C._storage_address(clone) != t_orig_storage_addr) 5191 5192 t_new_data_addr = torch._C._data_address(t) 5193 self.assertTrue(t_new_data_addr != orig_data_ptr) 5194 self.assertTrue(torch._C._data_address(view) == t_new_data_addr) 5195 self.assertTrue(torch._C._data_address(clone) == orig_data_ptr) 5196 5197 clone += torch.ones(1, device=device, dtype=dtype) 5198 5199 # Writing to `clone` should materialize it, so it should no longer 5200 # be COW. However, since `clone`'s storage is the only COW storage 5201 # left that holds a reference to the original data pointer, this 5202 # materialization should not actually cause a copy--it should 5203 # just reuse the original data pointer. 5204 5205 self.assertFalse(torch._C._is_cow_tensor(t)) 5206 self.assertFalse(torch._C._is_cow_tensor(view)) 5207 self.assertFalse(torch._C._is_cow_tensor(clone)) 5208 5209 self.assertTrue(torch._C._storage_address(t) == t_orig_storage_addr) 5210 self.assertTrue(torch._C._storage_address(view) == t_orig_storage_addr) 5211 self.assertTrue(torch._C._storage_address(clone) != t_orig_storage_addr) 5212 5213 self.assertTrue(torch._C._data_address(t) == t_new_data_addr) 5214 self.assertTrue(torch._C._data_address(view) == t_new_data_addr) 5215 self.assertTrue(torch._C._data_address(clone) == orig_data_ptr) 5216 5217 @skipXLA 5218 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 5219 def test_lazy_clone_binary_op_no_materialize(self, device, dtype): 5220 t = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype) 5221 clone = t._lazy_clone() 5222 res = t + clone 5223 self.assertTrue(torch._C._is_cow_tensor(t)) 5224 self.assertTrue(torch._C._is_cow_tensor(clone)) 5225 5226 # This tests that if a COW materialization is attempted inside an 5227 # `at::parallel_for` loop function, then an error is raised. This test is 5228 # implemented in Python rather than C++ because the C++ tests are built 5229 # without multithreading support in `at::parallel_for`. 5230 @skipXLA 5231 @skipIfTorchDynamo("Torchdynamo fails and we do not need to test it here anyway") 5232 @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) 5233 def test_parallel_cow_materialize_error(self, device, dtype): 5234 5235 def run(num_threads, num_parallel, skip_first, should_error): 5236 orig_num_threads = torch.get_num_threads() 5237 5238 try: 5239 torch.set_num_threads(num_threads) 5240 5241 a = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype)._lazy_clone() 5242 5243 if should_error: 5244 with self.assertRaisesRegex(RuntimeError, r'Materializing a storage'): 5245 torch._test_parallel_materialize( 5246 a, num_parallel, skip_first) 5247 else: 5248 torch._test_parallel_materialize(a, num_parallel, skip_first) 5249 5250 # Error should not raise in any case if the tensor is not COW 5251 b = torch.tensor([[0, 1], [2, 3]], device=device, dtype=dtype) 5252 torch._test_parallel_materialize(b, num_parallel, skip_first) 5253 5254 finally: 5255 torch.set_num_threads(orig_num_threads) 5256 5257 run(1, 1, False, True) 5258 run(1, 1, True, False) 5259 run(1, 10, False, True) 5260 run(1, 10, True, True) 5261 run(10, 1, False, True) 5262 run(10, 1, True, False) 5263 run(10, 10, False, True) 5264 run(10, 10, True, True) 5265 run(10, 2, False, True) 5266 run(10, 2, True, True) 5267 5268 # FIXME: move to test distributions 5269 @skipIfMps 5270 @dtypesIfCUDA(torch.float, torch.double, torch.half) 5271 @dtypes(torch.float, torch.double, torch.half) 5272 def test_multinomial(self, device, dtype): 5273 def make_prob_dist(shape, is_contiguous): 5274 if is_contiguous: 5275 if dtype == torch.half: 5276 return torch.zeros(shape, device=device).uniform_().to(dtype=torch.half) 5277 return torch.zeros(shape, device=device, dtype=dtype).uniform_() 5278 elif len(shape) == 1: 5279 if dtype == torch.half: 5280 return torch.zeros((shape + [5]), device=device).uniform_().to(dtype=torch.half)[:, 2] 5281 return torch.zeros((shape + [5]), device=device, dtype=dtype).uniform_()[:, 2] 5282 else: 5283 # num dim = 2 5284 new_shape = [2, shape[1], 7, 1, shape[0], 1, 10] 5285 if dtype == torch.half: 5286 prob_dist = torch.zeros(new_shape, device=device).uniform_().to(dtype=torch.half) 5287 else: 5288 prob_dist = torch.zeros(new_shape, device=device, dtype=dtype).uniform_() 5289 prob_dist = prob_dist.transpose(1, 4) 5290 prob_dist = prob_dist[1, :, 5, 0, :, 0, 4] 5291 assert not prob_dist.is_contiguous() # sanity check 5292 return prob_dist 5293 5294 for is_contiguous in (True, False): 5295 # with replacement 5296 n_row = 3 5297 for n_col in range(4, 5 + 1): 5298 prob_dist = make_prob_dist([n_row, n_col], is_contiguous) 5299 # indices that shouldn't be sampled (<0 means none) 5300 zero_prob_indices = torch.LongTensor(n_row).random_(-2, n_col).tolist() 5301 for i, j in enumerate(zero_prob_indices): 5302 if j >= 0: 5303 prob_dist[i, j] = 0 5304 n_sample = n_col * 3 5305 sample_indices = torch.multinomial(prob_dist, n_sample, True) 5306 self.assertEqual(prob_dist.dim(), 2) 5307 self.assertEqual(sample_indices.size(1), n_sample) 5308 for i in range(n_row): 5309 zero_prob_idx = zero_prob_indices[i] 5310 if zero_prob_idx < 0: 5311 continue 5312 for j in range(n_sample): 5313 self.assertNotEqual(sample_indices[i, j], zero_prob_idx, 5314 msg="sampled an index with zero probability") 5315 5316 # without replacement 5317 n_row = 3 5318 for n_col in range(2, 10 + 1, 2): 5319 prob_dist = make_prob_dist([n_row, n_col], is_contiguous) 5320 # indices that shouldn't be sampled (<0 means none) 5321 zero_prob_indices = torch.LongTensor(n_row).random_(-1, n_col).tolist() 5322 for i, j in enumerate(zero_prob_indices): 5323 if j >= 0: 5324 prob_dist[i, j] = 0 5325 n_sample = max(1, n_col - 2) 5326 sample_indices = torch.multinomial(prob_dist, n_sample, False) 5327 self.assertEqual(prob_dist.dim(), 2) 5328 self.assertEqual(sample_indices.size(1), n_sample) 5329 for i in range(n_row): 5330 row_samples = {} 5331 zero_prob_idx = zero_prob_indices[i] 5332 for j in range(n_sample): 5333 sample_idx = sample_indices[i, j] 5334 if zero_prob_idx >= 0: 5335 self.assertNotEqual(sample_idx, zero_prob_idx, 5336 msg="sampled an index with zero probability") 5337 self.assertNotIn(sample_idx, row_samples, "sampled an index twice") 5338 row_samples[sample_idx] = True 5339 5340 # vector 5341 n_col = 4 5342 prob_dist = make_prob_dist([n_col], is_contiguous).fill_(1) 5343 zero_prob_idx = 1 # index that shouldn't be sampled 5344 prob_dist[zero_prob_idx] = 0 5345 n_sample = 20 5346 sample_indices = torch.multinomial(prob_dist, n_sample, True) 5347 for sample_index in sample_indices: 5348 self.assertNotEqual(sample_index, zero_prob_idx, msg="sampled an index with zero probability") 5349 s_dim = sample_indices.dim() 5350 self.assertEqual(sample_indices.dim(), 1, msg="wrong number of dimensions") 5351 self.assertEqual(prob_dist.dim(), 1, msg="wrong number of prob_dist dimensions") 5352 self.assertEqual(sample_indices.size(0), n_sample, msg="wrong number of samples") 5353 5354 # CUDA misalignment issue (#46702) 5355 n_row, n_col = 2, 3 5356 prob_dist = make_prob_dist([n_row, n_col], True) 5357 n_sample = 1 5358 sample_indices = torch.multinomial(prob_dist, n_sample, True) 5359 self.assertEqual(sample_indices.dim(), 2, msg="wrong number of dimensions") 5360 self.assertEqual(sample_indices.size(1), n_sample, msg="wrong number of samples") 5361 5362 # FIXME: move to test distributions 5363 @onlyCUDA 5364 @dtypes(torch.float, torch.double, torch.half) 5365 def test_multinomial_deterministic(self, device, dtype): 5366 gen = torch.Generator(device=device) 5367 5368 trials = 5 5369 seed = 0 5370 prob_dist = torch.rand(10000, 1000, device=device, dtype=dtype) 5371 n_sample = 1 5372 5373 for i in range(trials): 5374 gen.manual_seed(seed) 5375 samples_1 = torch.multinomial(prob_dist, n_sample, True, generator=gen) 5376 5377 gen.manual_seed(seed) 5378 samples_2 = torch.multinomial(prob_dist, n_sample, True, generator=gen) 5379 5380 self.assertEqual(samples_1, samples_2) 5381 self.assertEqual(samples_1.dim(), 2, msg="wrong number of dimensions") 5382 self.assertEqual(samples_1.size(1), n_sample, msg="wrong number of samples") 5383 5384 # FIXME: move to test distributions 5385 @slowTest 5386 @dtypes(torch.float) 5387 def test_multinomial_rng_state_advance(self, device, dtype): 5388 corpus_size = 100000 5389 freqs = torch.ones(corpus_size, dtype=torch.float, device=device) 5390 n_sample = 100 5391 samples1 = torch.multinomial(freqs, n_sample, replacement=True) 5392 samples2 = torch.multinomial(freqs, n_sample, replacement=True) 5393 samples = torch.cat([samples1, samples2]) 5394 # expect no more than 1 repeating elements generated in 2 attempts 5395 # the probability of at least element being repeated is surprisingly large, 18% 5396 self.assertLessEqual(2 * n_sample - samples.unique().size(0), 2) 5397 samples1 = torch.multinomial(freqs, n_sample, replacement=False) 5398 samples2 = torch.multinomial(freqs, n_sample, replacement=False) 5399 samples = torch.cat([samples1, samples2]) 5400 # expect no more than 1 repeating elements generated in 2 attempts 5401 self.assertLessEqual(2 * n_sample - samples.unique().size(0), 1) 5402 5403 def _test_memory_format_transformations(self, device, input_generator_fn, transformation_fn, 5404 memory_format, compare_data=True, default_is_preserve=False): 5405 5406 assert memory_format == torch.channels_last or memory_format == torch.channels_last_3d 5407 5408 # xc is a channels last tensor 5409 xc = input_generator_fn(device) 5410 # xc is not memory dense, but looks like channels last 5411 # We don't preserve non-dense striding 5412 if not TEST_WITH_TORCHINDUCTOR: 5413 if memory_format == torch.channels_last: 5414 xc = xc[..., ::2, ::2] 5415 else: 5416 xc = xc[..., ::2, ::2, ::2] 5417 5418 clone = transformation_fn(xc, memory_format=torch.preserve_format) 5419 5420 5421 self.assertFalse(clone.is_contiguous()) 5422 self.assertTrue(clone.is_contiguous(memory_format=memory_format)) 5423 if not TEST_WITH_TORCHINDUCTOR: 5424 self.assertFalse(xc.is_contiguous()) 5425 self.assertFalse(xc.is_contiguous(memory_format=memory_format)) 5426 if compare_data: 5427 self.assertEqual(xc, clone.to(xc)) 5428 5429 xc = input_generator_fn(device) 5430 clone = transformation_fn(xc, memory_format=torch.contiguous_format) 5431 self.assertTrue(clone.is_contiguous()) 5432 self.assertFalse(clone.is_contiguous(memory_format=memory_format)) 5433 if compare_data: 5434 self.assertEqual(xc, clone.to(xc)) 5435 5436 xc = input_generator_fn(device) 5437 clone = transformation_fn(xc) 5438 5439 if default_is_preserve: 5440 self.assertFalse(clone.is_contiguous()) 5441 self.assertTrue(clone.is_contiguous(memory_format=memory_format)) 5442 else: 5443 self.assertTrue(clone.is_contiguous()) 5444 self.assertFalse(clone.is_contiguous(memory_format=memory_format)) 5445 if compare_data: 5446 self.assertEqual(xc, clone.to(xc)) 5447 5448 # TODO copy _like constructors to stride permutation instead of just layout 5449 if not TEST_WITH_TORCHINDUCTOR: 5450 x = torch.randn((3, 4, 5, 6, 7, 8, 9), device=device) 5451 for i in range(10): 5452 permutation = list(range(len(x.shape))) 5453 random.shuffle(permutation) 5454 x = x.permute(permutation) 5455 self.assertEqual(x.stride(), transformation_fn(x, memory_format=torch.preserve_format).stride()) 5456 5457 def test_memory_format_to(self, device): 5458 def get_generator(memory_format, shape): 5459 def input_generator_fn(device): 5460 return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) 5461 return input_generator_fn 5462 5463 def transformation_fn(tensor, **kwargs): 5464 return tensor.to(dtype=torch.float64, **kwargs) 5465 5466 formats_shapes = ( 5467 (torch.channels_last, (4, 3, 8, 8)), 5468 (torch.channels_last_3d, (4, 3, 8, 8, 8))) 5469 5470 for mf, shape in formats_shapes: 5471 self._test_memory_format_transformations( 5472 device, get_generator(mf, shape), transformation_fn, mf, default_is_preserve=True) 5473 5474 def test_memory_format_type(self, device): 5475 def get_generator(memory_format, shape): 5476 def input_generator_fn(device): 5477 return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) 5478 return input_generator_fn 5479 5480 def transformation_fn(tensor, **kwargs): 5481 return tensor.to(torch.float64, **kwargs) 5482 5483 formats_shapes = ( 5484 (torch.channels_last, (4, 3, 8, 8)), 5485 (torch.channels_last_3d, (4, 3, 8, 8, 8))) 5486 5487 for mf, shape in formats_shapes: 5488 self._test_memory_format_transformations( 5489 device, get_generator(mf, shape), transformation_fn, mf, default_is_preserve=True) 5490 5491 def test_memory_format_clone(self, device): 5492 def get_generator(memory_format, shape): 5493 def input_generator_fn(device): 5494 return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) 5495 return input_generator_fn 5496 5497 def transformation_fn(tensor, **kwargs): 5498 return tensor.clone(**kwargs) 5499 5500 formats_shapes = ( 5501 (torch.channels_last, (4, 3, 8, 8)), 5502 (torch.channels_last_3d, (4, 3, 8, 8, 8))) 5503 5504 for mf, shape in formats_shapes: 5505 self._test_memory_format_transformations( 5506 device, get_generator(mf, shape), transformation_fn, mf, True, default_is_preserve=True) 5507 5508 def test_memory_format_factory_like_functions_preserve(self, device): 5509 def get_generator(memory_format, shape): 5510 def input_generator_fn(device): 5511 return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) 5512 return input_generator_fn 5513 5514 transformation_fns = [ 5515 lambda t, **kwargs: torch.zeros_like(t, **kwargs), 5516 lambda t, **kwargs: torch.ones_like(t, **kwargs), 5517 lambda t, **kwargs: torch.randint_like(t, 10, 100, **kwargs), 5518 lambda t, **kwargs: torch.randint_like(t, 100, **kwargs), 5519 lambda t, **kwargs: torch.randn_like(t, **kwargs), 5520 lambda t, **kwargs: torch.rand_like(t, **kwargs), 5521 lambda t, **kwargs: torch.full_like(t, 7, **kwargs), 5522 lambda t, **kwargs: torch.empty_like(t, **kwargs)] 5523 5524 formats_shapes = ( 5525 (torch.channels_last, (4, 3, 8, 8)), 5526 (torch.channels_last_3d, (4, 3, 8, 8, 8))) 5527 5528 for mf, shape, in formats_shapes: 5529 for transformation_fn in transformation_fns: 5530 self._test_memory_format_transformations( 5531 device, get_generator(mf, shape), transformation_fn, mf, compare_data=False, default_is_preserve=True) 5532 5533 def test_memory_format_type_shortcuts(self, device): 5534 def get_generator(memory_format, shape, dtype): 5535 def input_generator_fn(device): 5536 return torch.randn(shape, device=device, dtype=dtype).clamp(0, 1) \ 5537 .round().contiguous(memory_format=memory_format) 5538 return input_generator_fn 5539 5540 5541 def get_fn(fn_name): 5542 def transformation_fn(tensor, **kwargs): 5543 fn = getattr(tensor, fn_name) 5544 return fn(**kwargs) 5545 return transformation_fn 5546 5547 shortcuts = ['byte', 'char', 'double', 'bool', 'half', 'int', 'long', 'short'] 5548 if device == 'cpu': 5549 shortcuts += ['bfloat16'] 5550 5551 formats_shapes = ( 5552 (torch.channels_last, (4, 3, 8, 8)), 5553 (torch.channels_last_3d, (4, 3, 8, 8, 8))) 5554 5555 for mf, shape in formats_shapes: 5556 for fn_name in shortcuts: 5557 self._test_memory_format_transformations( 5558 device, get_generator(mf, shape, torch.float32), get_fn(fn_name), mf, default_is_preserve=True) 5559 5560 # Test 'float' separately to avoid float->float no-op. 5561 for mf, shape in formats_shapes: 5562 self._test_memory_format_transformations( 5563 device, get_generator(mf, shape, torch.float64), get_fn('float'), mf, default_is_preserve=True) 5564 5565 @onlyCUDA 5566 def test_memory_format_cpu_and_cuda_ops(self, device): 5567 def get_generator(memory_format, shape): 5568 def input_generator_fn(device): 5569 return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) 5570 return input_generator_fn 5571 5572 def transformation_cpu_fn(tensor, **kwargs): 5573 return tensor.cpu(**kwargs) 5574 5575 def transformation_cuda_fn(tensor, **kwargs): 5576 return tensor.cuda(**kwargs) 5577 5578 formats_shapes = ( 5579 (torch.channels_last, (4, 3, 8, 8)), 5580 (torch.channels_last_3d, (4, 3, 8, 8, 8))) 5581 5582 for mf, shape in formats_shapes: 5583 self._test_memory_format_transformations( 5584 'cuda', get_generator(mf, shape), transformation_cpu_fn, mf, default_is_preserve=True) 5585 self._test_memory_format_transformations( 5586 'cpu', get_generator(mf, shape), transformation_cuda_fn, mf, default_is_preserve=True) 5587 5588 # FIXME: move to test_serialization 5589 @onlyNativeDeviceTypes 5590 def test_pickle_gradscaler(self, device): 5591 # This test should pass in 3 cases for cuda: 5592 # 1. cuda is not available. 5593 # 2. cuda is available but device is not cuda. 5594 # 3. cuda is available and device is cuda. 5595 # In case 1, a and b disable themselves on construction and shouldn't try to pickle workhorse attributes. 5596 # In case 2, a and b are enabled. Workhorse attributes participate in pickling, but none are lazy-inited 5597 # to cuda Tensors, because I don't want to do cuda things if device is not cuda. 5598 # In case 3, a and b are enabled and we may also try lazy-initing _scale to a cuda tensor. 5599 device = torch.device(device) 5600 try_lazy_inits = (True, False) 5601 GradScaler = partial(torch.GradScaler, device=device.type) 5602 for lazy_init_scale in try_lazy_inits: 5603 a = GradScaler(init_scale=3., growth_factor=4., backoff_factor=.5, growth_interval=2) 5604 if device.type == "cuda": 5605 self.assertTrue(not a.is_enabled() if torch.cuda.amp.common.amp_definitely_not_available() else a.is_enabled()) 5606 else: 5607 self.assertTrue(a.is_enabled()) 5608 if lazy_init_scale: 5609 # Dummy a.scale() call lazy-inits a._scale Tensor. 5610 a.scale(torch.tensor([4.0], dtype=torch.float32, device=device)) 5611 self.assertTrue(a._scale.device.type == device.type) 5612 # The following three lines should work whether or not cuda is available. 5613 serialized = pickle.dumps(a) 5614 b = pickle.loads(serialized) 5615 self.assertEqual(b.is_enabled(), a.is_enabled()) 5616 if a.is_enabled(): 5617 self.assertEqual(b.get_scale(), 3.) 5618 self.assertEqual(b.get_growth_factor(), 4.) 5619 self.assertEqual(b.get_backoff_factor(), .5) 5620 self.assertEqual(b.get_growth_interval(), 2) 5621 self.assertEqual(b._init_growth_tracker, 0) 5622 # supplies a dummy key to test the defaultdict's default_factory 5623 self.assertEqual(b._per_optimizer_states["fdsa"], 5624 torch.amp.grad_scaler._refresh_per_optimizer_state()) 5625 if lazy_init_scale: 5626 self.assertEqual(b.scale(torch.tensor([4.0], dtype=torch.float32, device=device)), 12.0) 5627 5628 # FIXME: move to test distributions 5629 def _test_multinomial_empty(self, device, replacement, num_samples): 5630 probs = torch.ones(0, 3, device=device) 5631 expected = torch.empty(0, num_samples, dtype=torch.int64) 5632 out = torch.multinomial(probs, num_samples=num_samples, replacement=replacement) 5633 self.assertEqual(out, expected) 5634 5635 # FIXME: move to test distributions 5636 def test_multinomial_empty_w_replacement(self, device): 5637 self._test_multinomial_empty(device, True, 1) 5638 self._test_multinomial_empty(device, True, 2) 5639 5640 # FIXME: move to test distributions 5641 def test_multinomial_empty_wo_replacement(self, device): 5642 self._test_multinomial_empty(device, False, 1) 5643 self._test_multinomial_empty(device, False, 2) 5644 5645 @onlyNativeDeviceTypes 5646 @dtypes(torch.float, torch.double) 5647 def test_grad_scaling_unscale(self, device, dtype): 5648 device = torch.device(device) 5649 device0 = "cuda:0" if device.type == "cuda" else "cpu" 5650 inv_scale = torch.full((1,), 0.25, dtype=torch.float, device=device0) 5651 found_inf = torch.full((1,), 0.0, dtype=torch.float, device=device0) 5652 5653 size = 20 5654 g = torch.full((size, size), 4.0, dtype=dtype, device=device0) 5655 ginf = g.clone() 5656 ginf[2, 2] = float('inf') 5657 gnan = g.clone() 5658 gnan[2, 2] = float('nan') 5659 5660 # Tries selected combinations of 5661 # - contiguous grads 5662 # - g.clone().t() which is not contiguous but still non overlapping and dense 5663 # - variants of g.clone()[:, :5] which are not non overlapping and dense 5664 # Non overlapping and dense grads route into a multi tensor apply kernel, 5665 # others use a fallback per-tensor kernel, so we should try both. 5666 cases = ( 5667 ([g.clone(), g.clone()], False), 5668 ([g.clone(), g.clone().t()], False), 5669 ([g.clone(), g.clone()[:, :5]], False), 5670 ([g.clone()[:, :5], g.clone()[:, :5]], False), 5671 ([g.clone(), ginf.clone()], True), 5672 ([g.clone(), gnan.clone()], True), 5673 ([g.clone(), ginf.clone()[:, :5]], True), 5674 ([g.clone(), gnan.clone()[:, :5]], True), 5675 ([ginf.clone(), g.clone()[:, :5]], True), 5676 ([ginf.clone()[:, :5], g.clone()[:, :5]], True), 5677 ) 5678 5679 for grads, has_inf in cases: 5680 found_inf.zero_() 5681 torch._amp_foreach_non_finite_check_and_unscale_(grads, found_inf, inv_scale) 5682 if has_inf: 5683 self.assertEqual(found_inf, 1.0) 5684 else: 5685 self.assertEqual(found_inf, 0.0) 5686 for grad in grads: 5687 self.assertEqual(grad, torch.ones_like(grad), rtol=1e-5, atol=1e-7) 5688 5689 # When passing lists with mismatched dtypes to a raw 5690 # _amp_foreach_non_finite_check_and_unscale_ call on CUDA, 5691 # it's expected to fall back to single-tensor TensorIterator kernel. 5692 grads = [g.clone(), g.to(dtype=torch.float16)] 5693 torch._amp_foreach_non_finite_check_and_unscale_(grads, found_inf, inv_scale) 5694 for grad in grads: 5695 self.assertEqual(grad, torch.ones_like(grad), rtol=1e-5, atol=1e-7) 5696 5697 # Passing lists with mismatched devices to a raw 5698 # _amp_foreach_non_finite_check_and_unscale_ call should raise errors. 5699 if device.type == "cuda" and TEST_MULTIGPU: 5700 with self.assertRaisesRegex(RuntimeError, r"Expected all tensors to be on the same device"): 5701 torch._amp_foreach_non_finite_check_and_unscale_([g.clone(), g.to(device="cuda:1")], 5702 found_inf, 5703 inv_scale) 5704 5705 # Creates a list of grads with mismatched dtypes and devices, to ensure 5706 # scaler._unscale_grads_ organizes grads by dtype and device before calling 5707 # _amp_foreach_non_finite_check_and_unscale_ on each set. 5708 # If inject_inf >= 0, writes an inf into one grad for _unscale_grads_ to find. 5709 def perfect_storm_grads(inject_inf): 5710 grads = [g.clone(), g.clone()[:, :5], g.to(dtype=torch.float16), g.to(dtype=torch.float16)] 5711 if device.type == "cuda" and TEST_MULTIGPU: 5712 grads += [g.to(device="cuda:1"), 5713 g.to(device="cuda:1")[:, :5], 5714 g.to(device="cuda:1", dtype=torch.float16), 5715 g.to(device="cuda:1", dtype=torch.float16)] 5716 if inject_inf >= 0: 5717 grads[inject_inf][2, 2] = float('inf') 5718 return grads 5719 5720 GradScaler = partial(torch.GradScaler, device=device.type) 5721 scaler = GradScaler() 5722 dummy_params = [torch.empty_like(g) for g in perfect_storm_grads(-1)] 5723 dummy_opt = torch.optim.SGD(dummy_params, lr=1.) 5724 5725 # Ensures the inf/nan checking can find an inf injected onto any grad in the perfect storm. 5726 for inject_inf in range(-1, len(dummy_params)): 5727 found_inf = torch.full((1,), 0.0, dtype=torch.float, device=device0) 5728 grads = perfect_storm_grads(inject_inf) 5729 for i, p in enumerate(dummy_params): 5730 p.grad = grads[i] 5731 found_inf_per_device = scaler._unscale_grads_(dummy_opt, inv_scale, found_inf, True) 5732 if inject_inf < 0: 5733 # No inf was injected, ensures unscaling worked normally. 5734 self.assertTrue(sum(v.item() for v in found_inf_per_device.values()) == 0) 5735 for grad in grads: 5736 self.assertEqual(grad, torch.ones_like(grad), rtol=1e-5, atol=1e-7) 5737 else: 5738 # inf was injected, ensures inf was found. 5739 self.assertTrue(sum(v.item() for v in found_inf_per_device.values()) == 1) 5740 5741 @onlyNativeDeviceTypes 5742 @dtypes(torch.float) 5743 def test_grad_scaling_update_scale(self, device, dtype): 5744 growth = 2.0 5745 backoff = 0.25 5746 growth_interval = 2 5747 scale = torch.full((1,), 4.0, dtype=dtype, device=device) 5748 growth_tracker = torch.full((1,), 0.0, dtype=torch.int32, device=device) 5749 found_inf = torch.full((1,), 0.0, dtype=torch.float, device=device) 5750 5751 # Simulates 2 consecutive unskipped iterations 5752 torch._amp_update_scale_(scale, growth_tracker, found_inf, growth, backoff, growth_interval) 5753 self.assertEqual(growth_tracker, 1) 5754 self.assertEqual(scale, 4.0) 5755 torch._amp_update_scale_(scale, growth_tracker, found_inf, growth, backoff, growth_interval) 5756 self.assertEqual(growth_tracker, 0) 5757 self.assertEqual(scale, 8.0) 5758 5759 # Simulates a skipped iteration 5760 found_inf.fill_(1.0) 5761 torch._amp_update_scale_(scale, growth_tracker, found_inf, growth, backoff, growth_interval) 5762 self.assertEqual(growth_tracker, 0) 5763 self.assertEqual(scale, 2.0) 5764 5765 @skipIfTorchDynamo("Failed running call_function for sparse_coo_tensor. See https://github.com/pytorch/pytorch/issues/118856") 5766 @onlyNativeDeviceTypes 5767 @dtypes(torch.float) 5768 def test_grad_scaling_unscale_sparse(self, device, dtype): 5769 device = torch.device(device) 5770 scaler = torch.GradScaler(device=device.type) 5771 5772 inv_scale = torch.full((1,), 0.25, dtype=dtype, device=device) 5773 found_inf = torch.empty((1,), dtype=dtype, device=device) 5774 cur = found_inf.device 5775 5776 i = torch.tensor([[0, 1, 1], 5777 [2, 0, 2]], device=device, dtype=torch.int64) 5778 v = torch.tensor([16., 32., 64.], device=device, dtype=torch.float) 5779 s = torch.sparse_coo_tensor(i, v, torch.Size([2, 3]), device=device, dtype=dtype) 5780 5781 p = s.clone() 5782 assert p.is_sparse 5783 opt = torch.optim.SGD([p], lr=1.) 5784 5785 p.grad = s.clone() 5786 found_inf.zero_() 5787 found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, False)[cur] 5788 self.assertEqual(found_inf, 0.0) 5789 self.assertEqual(p.grad.to_dense(), (s / 4).to_dense()) 5790 5791 v = torch.FloatTensor([16., 32., float('inf')]) 5792 p.grad = torch.sparse_coo_tensor(i, v, torch.Size([2, 3]), device=device, dtype=dtype) 5793 found_inf.zero_() 5794 found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, False)[cur] 5795 self.assertEqual(found_inf, 1.0) 5796 5797 v = torch.FloatTensor([16., 32., float('nan')]) 5798 p.grad = torch.sparse_coo_tensor(i, v, torch.Size([2, 3]), device=device, dtype=dtype) 5799 found_inf.zero_() 5800 found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, False)[cur] 5801 self.assertEqual(found_inf, 1.0) 5802 5803 p = s.clone().half() 5804 assert p.is_sparse 5805 opt = torch.optim.SGD([p], lr=1.) 5806 5807 p.grad = s.clone().half() 5808 found_inf.zero_() 5809 found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, True)[cur] 5810 self.assertEqual(found_inf, 0.0) 5811 self.assertEqual(p.grad.to_dense(), (s.half() / 4).to_dense()) 5812 5813 # Creates fp16 sparse tensor with duplicated indices (uncoalesced). The uncoalesced representation 5814 # does not overflow in fp16, but the coalesced representation would, because 64000 + 64000 > fp16 max. 5815 # _amp_non_finite_check_and_unscale_ should report an overflow here. 5816 i = torch.LongTensor([[0, 1, 0], 5817 [2, 0, 2]]) 5818 v = torch.FloatTensor([64000., 32., 64000.]) 5819 p.grad = torch.sparse_coo_tensor(i, v, torch.Size([2, 3]), device=device, dtype=torch.float16) 5820 found_inf.zero_() 5821 found_inf = scaler._unscale_grads_(opt, inv_scale, found_inf, True)[cur] 5822 self.assertEqual(found_inf, 1.0) 5823 5824 @onlyNativeDeviceTypes 5825 def test_grad_scaling_state_dict(self, device): 5826 device = torch.device(device) 5827 GradScaler = partial(torch.GradScaler, device=device.type) 5828 for lazy_init_scale in True, False: 5829 s0 = GradScaler(init_scale=3., growth_factor=4., backoff_factor=.5, growth_interval=2) 5830 s1 = GradScaler(init_scale=6., growth_factor=7., backoff_factor=.8, growth_interval=1) 5831 5832 # sets a random value for load_state_dict to overwrite 5833 s1._init_growth_tracker = 7 5834 5835 if lazy_init_scale: 5836 # Dummy scale() call to ensure the scale tensor is lazily initialized. 5837 s1.scale(torch.full((1,), 4.0, dtype=torch.float32, device=device)) 5838 if "cuda" == device.type: 5839 self.assertTrue(isinstance(s1._scale, torch.cuda.FloatTensor)) 5840 else: 5841 self.assertTrue(isinstance(s1._scale, torch.FloatTensor)) 5842 5843 s1.load_state_dict(s0.state_dict()) 5844 5845 self.assertEqual(s1.get_scale(), 3.) 5846 self.assertEqual(s1.get_growth_factor(), 4.) 5847 self.assertEqual(s1.get_backoff_factor(), .5) 5848 self.assertEqual(s1.get_growth_interval(), 2) 5849 self.assertEqual(s1._init_growth_tracker, 0) 5850 5851 # _run_scaling_case generalizes some single-optimizer test logic to avoid too much copy-pasting below. 5852 def _run_scaling_case(self, device, run, unskipped, skipped, atol=1e-7, optimizer_ctor=torch.optim.SGD, optimizer_kwargs=None): 5853 # Ensure scaling can be disabled without changing user control flow. 5854 for enabled in True, False: 5855 ( 5856 mod_control, mod_scaling, opt_control, opt_scaling, data, loss_fn, skip_iter, 5857 ) = _create_scaling_case(device=device, optimizer_ctor=optimizer_ctor, optimizer_kwargs=optimizer_kwargs) 5858 5859 # For functionality, test with a modest initial scale, and an unrealistically-large growth factor 5860 # so any potential errors with the growth factor handling will be magnified. 5861 GradScaler = partial(torch.GradScaler, device=device) 5862 scaler = GradScaler(init_scale=128., growth_factor=2.0, enabled=enabled, growth_interval=1) 5863 5864 _ = run(device, data, mod_control, opt_control, scaler, loss_fn, skip_iter, False) 5865 ret = run(device, data, mod_scaling, opt_scaling, scaler, loss_fn, skip_iter, True) 5866 5867 # Allows run() to optionally return a different scaler instance. 5868 scaler = ret if ret else scaler 5869 5870 # If scaling was enabled, the scale factor should have been multiplied by the growth factor 5871 # len(data) - skipped times and the backoff factor "skipped" times. 5872 if enabled: 5873 net_growth = scaler.get_growth_factor()**unskipped if unskipped > 0 else 1.0 5874 net_backoff = scaler.get_backoff_factor()**skipped if skipped > 0 else 1.0 5875 self.assertTrue(scaler.get_scale() == (128. * net_growth * net_backoff)) 5876 else: 5877 self.assertTrue(scaler.get_scale() == 1.0) 5878 5879 for c, s in zip(mod_control.parameters(), mod_scaling.parameters()): 5880 self.assertEqual(c.grad, s.grad, atol=atol, rtol=1e-05) 5881 5882 c_state, s_state = opt_control.state[c], opt_scaling.state[s] 5883 for k in c_state: 5884 self.assertEqual(c_state[k], s_state[k], atol=atol, rtol=1e-05, msg=k) 5885 5886 self.assertEqual(c, s, atol=atol, rtol=1e-05) 5887 5888 @onlyNativeDeviceTypes 5889 @parametrize("foreach, fused", [(None, None), (True, None), (None, True)]) 5890 @optims( 5891 [optim for optim in optim_db if optim.optim_cls in [torch.optim.AdamW, torch.optim.Adam, torch.optim.SGD]], 5892 dtypes=[torch.float32] 5893 ) 5894 def test_grad_scaling_autocast(self, device, dtype, optim_info, foreach, fused): 5895 try_pickle = False 5896 5897 def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api): 5898 for i, (input, target) in enumerate(data): 5899 optimizer.zero_grad() 5900 with torch.autocast(device_type=device, dtype=torch.half, enabled=try_scaling_api): 5901 output = model(input) 5902 loss = loss_fn(output, target) 5903 if try_scaling_api: 5904 scaler.scale(loss).backward() 5905 if i == skip_iter and scaler.is_enabled(): 5906 with torch.no_grad(): 5907 model[1].weight.grad.fill_(float('inf')) 5908 scaler.step(optimizer) 5909 scaler.update() 5910 if try_pickle: 5911 scaler = pickle.loads(pickle.dumps(scaler)) 5912 else: 5913 loss.backward() 5914 if (not scaler.is_enabled()) or (i != skip_iter): 5915 optimizer.step() 5916 return scaler 5917 5918 optimizer_ctor = optim_info.optim_cls 5919 5920 # Compares no scaling + no autocasting against scaling + autocasting. 5921 # NOTE(mkozuki): With current way of testing, `torch.optim.Adam` is failing in spite of `foreach` and `fused`. 5922 # Giving some flexibility to this test might help. 5923 context = contextlib.nullcontext 5924 if optimizer_ctor in (torch.optim.Adam, torch.optim.AdamW): 5925 from functools import partial 5926 context = partial(self.assertRaises, AssertionError) 5927 with context(): 5928 # sets atol=1e-3 because we're comparing pure fp32 arithmetic vs a mixture of fp16 and fp32 5929 self._run_scaling_case( 5930 device, run, unskipped=3, skipped=1, atol=1e-3, 5931 optimizer_ctor=optimizer_ctor, optimizer_kwargs={"foreach": foreach, "fused": fused}, 5932 ) 5933 # this will be picked up by try_pickle within run(): 5934 try_pickle = True 5935 self._run_scaling_case( 5936 device, run, unskipped=3, skipped=1, atol=1e-3, 5937 optimizer_ctor=optimizer_ctor, optimizer_kwargs={"foreach": foreach, "fused": fused}, 5938 ) 5939 5940 # Make sure that the parameters become nonsense when scaled gradients are finite 5941 # but they get invalidated before `optimizer.step`, after `GradScaler.unscale_` 5942 5943 @onlyNativeDeviceTypes 5944 @optims( 5945 [optim for optim in optim_db if optim.optim_cls in [torch.optim.AdamW, torch.optim.Adam, torch.optim.SGD]], 5946 dtypes=[torch.float32] 5947 ) 5948 def test_params_invalidated_with_grads_invalidated_between_unscale_and_step(self, device, dtype, optim_info): 5949 optimizer_ctor = optim_info.optim_cls 5950 all_optim_inputs = _get_optim_inputs_including_global_cliquey_kwargs( 5951 device, dtype, optim_info, skip=("differentiable",)) 5952 5953 for optim_input in all_optim_inputs: 5954 model, _, optimizer, _, data, loss_fn, _ = _create_scaling_case( 5955 device, optimizer_ctor=optimizer_ctor, optimizer_kwargs=optim_input.kwargs, 5956 ) 5957 scaler = torch.GradScaler(device=device, init_scale=128.0) 5958 5959 for input, target in data: 5960 optimizer.zero_grad() 5961 with torch.autocast(device_type=device, dtype=torch.half): 5962 output = model(input) 5963 loss = loss_fn(output, target) 5964 scaler.scale(loss).backward() 5965 scaler.unscale_(optimizer) 5966 5967 # deliberately break grads 5968 for j, param in enumerate(model.parameters()): 5969 param.grad.copy_(torch.inf if j % 2 else torch.nan) 5970 5971 scaler.step(optimizer) 5972 scaler.update() 5973 5974 self.assertTrue(all((p.isnan().any() or p.isinf().any()) for p in model.parameters())) 5975 5976 @onlyNativeDeviceTypes 5977 def test_grad_scale_will_not_overflow(self, device): 5978 device = torch.device(device) 5979 model = torch.nn.Linear(5, 1).to(device) 5980 optimizer = torch.optim.Adam(model.parameters()) 5981 scaler = torch.GradScaler(device=device.type, growth_interval=1, growth_factor=2**4, init_scale=1e38) 5982 optimizer.zero_grad() 5983 x = torch.randn(1, 5).to(device) 5984 y = 1e-30 * torch.randn(1, 1).to(device) 5985 l = ((model(x) - y) ** 2).mean() 5986 scaler.scale(l).backward() 5987 scaler.step(optimizer) 5988 scaler.update() 5989 assert scaler._scale != float("inf") and scaler._scale != float("nan") 5990 5991 @onlyNativeDeviceTypes 5992 def test_grad_scaling_clipping(self, device): 5993 device = torch.device(device) 5994 5995 def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api): 5996 max_norm = 0.2 # A reasonable value that actually has an effect, based on printouts of grads 5997 for i, (input, target) in enumerate(data): 5998 optimizer.zero_grad() 5999 output = model(input) 6000 loss = loss_fn(output, target) 6001 if try_scaling_api: 6002 scaler.scale(loss).backward() 6003 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm * scaler.get_scale()) 6004 if i == skip_iter and scaler.is_enabled(): 6005 model[1].weight.grad.data.fill_(float('inf')) 6006 scaler.step(optimizer) 6007 scaler.update() 6008 else: 6009 loss.backward() 6010 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) 6011 if (not scaler.is_enabled()) or (i != skip_iter): 6012 optimizer.step() 6013 6014 self._run_scaling_case(device.type, run, unskipped=3, skipped=1, atol=1e-5) 6015 6016 @onlyNativeDeviceTypes 6017 def test_grad_scaling_clipping_separate_unscale(self, device): 6018 device = torch.device(device) 6019 6020 def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api): 6021 max_norm = 0.2 # A reasonable value that actually has an effect, based on printouts of grads 6022 for i, (input, target) in enumerate(data): 6023 optimizer.zero_grad() 6024 output = model(input) 6025 loss = loss_fn(output, target) 6026 if try_scaling_api: 6027 scaler.scale(loss).backward() 6028 if i == skip_iter and scaler.is_enabled(): 6029 model[1].weight.grad.data.fill_(float('inf')) 6030 scaler.unscale_(optimizer) 6031 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm, error_if_nonfinite=False) 6032 scaler.step(optimizer) 6033 scaler.update() 6034 else: 6035 loss.backward() 6036 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) 6037 if (not scaler.is_enabled()) or (i != skip_iter): 6038 optimizer.step() 6039 6040 self._run_scaling_case(device.type, run, unskipped=3, skipped=1) 6041 6042 @onlyNativeDeviceTypes 6043 @unittest.skipIf(IS_WINDOWS, 'FIXME: fix this test for Windows') 6044 def test_grad_scaling_penalty(self, device): 6045 device = torch.device(device) 6046 6047 def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api): 6048 for i, (input, target) in enumerate(data): 6049 optimizer.zero_grad() 6050 output = model(input) 6051 loss = loss_fn(output, target) 6052 6053 if try_scaling_api: 6054 grad_params = torch.autograd.grad(scaler.scale(loss), 6055 model.parameters(), create_graph=True) 6056 inv_scale = 1. / scaler.get_scale() 6057 grad_params = [p * inv_scale for p in grad_params] 6058 else: 6059 grad_params = torch.autograd.grad(loss, model.parameters(), create_graph=True) 6060 6061 grad_norm = 0 6062 for grad in grad_params: 6063 grad_norm += grad.pow(2).sum() 6064 grad_norm = grad_norm.sqrt() 6065 loss = loss + grad_norm 6066 6067 if try_scaling_api: 6068 scaler.scale(loss).backward() 6069 if i == skip_iter and scaler.is_enabled(): 6070 model[1].weight.grad.data.fill_(float('inf')) 6071 scaler.step(optimizer) 6072 scaler.update() 6073 else: 6074 loss.backward() 6075 if (not scaler.is_enabled()) or (i != skip_iter): 6076 optimizer.step() 6077 6078 self._run_scaling_case(device.type, run, unskipped=3, skipped=1) 6079 6080 @onlyNativeDeviceTypes 6081 def test_grad_scaling_accumulation(self, device): 6082 device = torch.device(device) 6083 6084 def run(device, data, model, optimizer, scaler, loss_fn, skip_iter, try_scaling_api): 6085 iters_to_accumulate = 2 6086 for i, (input, target) in enumerate(data): 6087 output = model(input) 6088 loss = loss_fn(output, target) 6089 loss = loss / iters_to_accumulate 6090 if try_scaling_api: 6091 scaler.scale(loss).backward() 6092 else: 6093 loss.backward() 6094 if (i + 1) % iters_to_accumulate == 0: 6095 if try_scaling_api: 6096 scaler.step(optimizer) 6097 scaler.update() 6098 optimizer.zero_grad() 6099 else: 6100 optimizer.step() 6101 optimizer.zero_grad() 6102 6103 self._run_scaling_case(device.type, run, unskipped=2, skipped=0) 6104 6105 @onlyNativeDeviceTypes 6106 def test_grad_scaling_multiple(self, device): 6107 device = torch.device(device) 6108 # Tests gradient scaling with 2 models and 2 optimizers that both receive gradients from 2 losses. 6109 # Some of the logic here cannot reuse the generic helper functions created for the 1-optimizer cases. 6110 for enabled in True, False: 6111 mod_control0, mod_scaling0, opt_control0, opt_scaling0, data, loss_fn, skip_iter = \ 6112 _create_scaling_case(device.type) 6113 mod_control1, mod_scaling1, opt_control1, opt_scaling1 = \ 6114 _create_scaling_models_optimizers(device.type) 6115 6116 GradScaler = partial(torch.GradScaler, device=device.type) 6117 scaler = GradScaler(init_scale=128., growth_factor=2.0, enabled=enabled, growth_interval=1) 6118 6119 def run(model0, model1, optimizer0, optimizer1, try_scaling_api): 6120 for i, (input, target) in enumerate(data): 6121 optimizer0.zero_grad() 6122 optimizer1.zero_grad() 6123 output0 = model0(input) 6124 output1 = model1(input) 6125 loss0 = loss_fn(0.3 * output0 + 0.7 * output1, target) 6126 loss1 = loss_fn(0.6 * output0 - 0.4 * output1, target) 6127 6128 if try_scaling_api: 6129 scaler.scale(loss0).backward(retain_graph=True) 6130 scaler.scale(loss1).backward() 6131 if i == skip_iter and scaler.is_enabled(): 6132 model1[1].weight.grad.data.fill_(float('inf')) 6133 6134 # As an additional stress test, separately unscale for one of the optimizers. 6135 scaler.unscale_(optimizer0) 6136 6137 scaler.step(optimizer0) 6138 scaler.step(optimizer1) 6139 scaler.update() 6140 else: 6141 loss0.backward(retain_graph=True) 6142 loss1.backward() 6143 optimizer0.step() 6144 if (not scaler.is_enabled()) or (i != skip_iter): 6145 optimizer1.step() 6146 6147 run(mod_control0, mod_control1, opt_control0, opt_control1, False) 6148 run(mod_scaling0, mod_scaling1, opt_scaling0, opt_scaling1, True) 6149 6150 # The loss scale should have been multiplied by the growth factor 3 times and the backoff factor once. 6151 self.assertTrue(scaler.get_scale() == (128. * scaler.get_growth_factor()**3 * 6152 scaler.get_backoff_factor()**1) if enabled else 1.0) 6153 6154 for c, s in zip(chain(mod_control0.parameters(), mod_control1.parameters()), 6155 chain(mod_scaling0.parameters(), mod_scaling1.parameters())): 6156 self.assertEqual(c, s, rtol=1e-5, atol=1e-7) 6157 6158 @onlyNativeDeviceTypes 6159 def test_grad_scaler_pass_itself(self, device): 6160 device = torch.device(device) 6161 GradScaler = partial(torch.amp.GradScaler, device=device.type) 6162 6163 class _PlaceHolderOptimizer(torch.optim.Optimizer): 6164 tester = self 6165 6166 def __init__(self, params, defaults=None): 6167 if defaults is None: 6168 defaults = {} 6169 super().__init__(params, defaults) 6170 self._step_supports_amp_scaling = True 6171 6172 class Optimizer1(_PlaceHolderOptimizer): 6173 def step(self, closure=None, *, grad_scaler=None): 6174 self.tester.assertTrue(isinstance(grad_scaler, torch.amp.GradScaler)) 6175 self.tester.assertFalse(hasattr(self, "grad_scale")) 6176 self.tester.assertFalse(hasattr(self, "found_inf")) 6177 6178 class Optimizer2(_PlaceHolderOptimizer): 6179 def step(self, closure=None): 6180 self.tester.assertTrue(isinstance(self.grad_scale, torch.Tensor)) 6181 self.tester.assertTrue(isinstance(self.found_inf, torch.Tensor)) 6182 6183 x = torch.randn(4, 4).to(device) 6184 m = torch.nn.Linear(4, 1).to(device) 6185 o1 = Optimizer1(m.parameters()) 6186 o2 = Optimizer2(m.parameters()) 6187 scaler = GradScaler(init_scale=2.0) 6188 6189 with torch.autocast(device_type=device.type, dtype=torch.half): 6190 y = m(x) 6191 loss = y.mean() 6192 scaler.scale(loss).backward() 6193 with self.assertWarns(FutureWarning): 6194 scaler.step(o1) 6195 scaler.step(o2) 6196 scaler.update() 6197 6198 @onlyNativeDeviceTypes 6199 def test_grad_scaler_deprecated_warning(self, device): 6200 device = torch.device(device) 6201 GradScaler = torch.cuda.amp.GradScaler if "cuda" == device.type else torch.cpu.amp.GradScaler 6202 6203 with self.assertWarnsRegex( 6204 FutureWarning, 6205 rf"`torch.{device.type}.amp.GradScaler\(args...\)` is deprecated.", 6206 ): 6207 _ = GradScaler(init_scale=2.0) 6208 6209 @dtypesIfCUDA(torch.float, torch.double, torch.half) 6210 @dtypesIfCPU(torch.float, torch.double, torch.bfloat16, torch.half) 6211 @dtypes(torch.float, torch.double) 6212 def test_multinomial_cpu(self, device, dtype): 6213 def make_prob_dist(shape, is_contiguous): 6214 if is_contiguous: 6215 if dtype == torch.half or dtype == torch.bfloat16: 6216 return torch.zeros(shape, device=device).uniform_().to(dtype=dtype) 6217 return torch.zeros(shape, device=device, dtype=dtype).uniform_() 6218 elif len(shape) == 1: 6219 if dtype == torch.half or dtype == torch.bfloat16: 6220 return torch.zeros((shape + [5]), device=device).uniform_().to(dtype=dtype)[:, 2] 6221 return torch.zeros((shape + [5]), device=device, dtype=dtype).uniform_()[:, 2] 6222 else: 6223 # num dim = 2 6224 new_shape = [2, shape[1], 7, 1, shape[0], 1, 10] 6225 if dtype == torch.half or dtype == torch.bfloat16: 6226 prob_dist = torch.zeros(new_shape, device=device).uniform_().to(dtype=dtype) 6227 else: 6228 prob_dist = torch.zeros(new_shape, device=device, dtype=dtype).uniform_() 6229 prob_dist = prob_dist.transpose(1, 4) 6230 prob_dist = prob_dist[1, :, 5, 0, :, 0, 4] 6231 assert not prob_dist.is_contiguous() # sanity check 6232 return prob_dist 6233 6234 # FIXME: move to elementwise ternary test suite 6235 # As the test fails with Runtime Error not raised on XLA 6236 @onlyNativeDeviceTypes 6237 def test_where_scalar_handcrafted_values(self, device): 6238 # Tests ScalarxScalar, ScalarxTensor and TensorxScalar 6239 # variant of `where` against NumPy version with 6240 # handcrafted values. 6241 condition_shape = (5, 5) 6242 dtypes = ( 6243 torch.bool, torch.uint8, torch.int8, torch.int16, torch.int64, 6244 torch.float16, torch.float32, torch.float64, 6245 torch.complex64, torch.complex128, 6246 ) 6247 shapes = ((), (5,), (1, 5),) 6248 6249 with torch.no_grad(): 6250 tensors = (torch.empty(shape, dtype=dtype, device=device).fill_(17) 6251 for shape, dtype in product(shapes, dtypes)) 6252 6253 # Use different values for `x` and `y` 6254 # as they are the output values which are compared. 6255 x_vals = (True, 3, 7.0, 1 + 0.5j) 6256 y_vals = itertools.chain((False, 4, 8.0, 2 + 0.5j), tensors) 6257 for x in x_vals: 6258 for y in y_vals: 6259 condition = torch.empty(*condition_shape, dtype=torch.bool, device=device).bernoulli_() 6260 common_dtype = torch.result_type(x, y) 6261 6262 def check_equal(condition, x, y): 6263 condition_np = condition.cpu().numpy() 6264 x_np = x.cpu().numpy() if isinstance(x, torch.Tensor) else x 6265 y_np = y.cpu().numpy() if isinstance(y, torch.Tensor) else y 6266 6267 # NumPy aggressively promotes to double, hence cast to output to correct dtype 6268 expected = torch.from_numpy(np.where(condition_np, x_np, y_np)).to(common_dtype) 6269 result = torch.where(condition, x, y) 6270 self.assertEqual(expected, result) 6271 6272 check_equal(condition, x, y) 6273 check_equal(condition, y, x) 6274 if self.device_type == "cuda": 6275 check_equal(condition, torch.tensor(x), y) 6276 check_equal(condition, y, torch.tensor(x)) 6277 if not isinstance(y, torch.Tensor): 6278 check_equal(condition, torch.tensor(y), torch.tensor(x)) 6279 if isinstance(y, torch.Tensor) and y.ndim > 0: 6280 check_equal(torch.tensor(True), x, y) 6281 check_equal(torch.tensor(True), y, x) 6282 6283 6284 @skipIfTorchInductor("FIXME") 6285 def test_hook_remove(self, device): 6286 # Reference: https://github.com/pytorch/pytorch/issues/58354 6287 def _test_helper(remove_hook): 6288 def install_hook(tensor): 6289 handle = None 6290 6291 def hook(tensor): 6292 if remove_hook: 6293 handle.remove() 6294 return torch.zeros_like(tensor) 6295 handle = tensor.register_hook(hook) 6296 6297 t = torch.ones((1, 5), device=device, requires_grad=True) 6298 install_hook(t) 6299 6300 # First call to backward 6301 t.mean().backward() 6302 self.assertEqual(t.grad, torch.zeros_like(t)) 6303 6304 # Second call to backward 6305 t.mean().backward() 6306 if remove_hook: 6307 # After removing the hook, make sure the usual gradient is returned 6308 self.assertEqual(t.grad, 0.2 * torch.ones_like(t)) 6309 else: 6310 self.assertEqual(t.grad, torch.zeros_like(t)) 6311 6312 _test_helper(remove_hook=True) 6313 _test_helper(remove_hook=False) 6314 6315 # FIXME: get PyTorch/XLA to run test_testing 6316 # This test should ideally be in test_testing.py, 6317 # but since pytorch/xla runs tests from test_torch.py, we have it here. 6318 @skipXLA 6319 def test_skip_xla(self, device): 6320 if self.device_type == 'xla': 6321 # Should not reach here! 6322 self.assertTrue(False) 6323 6324 # FIXME: get PyTorch/XLA to run test_testing 6325 # This test should ideally be in test_testing.py, 6326 # but since pytorch/xla runs tests from test_torch.py, we have it here. 6327 @expectedFailureXLA 6328 def test_expected_failure_xla(self, device): 6329 if self.device_type == 'xla': 6330 self.assertTrue(False) 6331 6332 # FIXME: get PyTorch/XLA to run test_testing 6333 # This test should ideally be in test_testing.py, 6334 # but since pytorch/xla runs tests from test_torch.py, we have it here. 6335 def test_assertRaisesRegex_ignore_msg_non_native_device(self, device): 6336 # Verify that self.assertRaisesRegex only checks the Error and ignores 6337 # message for non-native devices. 6338 x = torch.randn((10, 3), device=device) 6339 t = torch.empty(10, dtype=torch.int64, device=device).random_(0, 3) 6340 invalid_weight = torch.randn(4, device=device) 6341 msg = "weight tensor should be defined either for all 3 classes or no classes" 6342 6343 # XLA raises RuntimeError with a different message. 6344 with self.assertRaisesRegex(RuntimeError, msg): 6345 torch.nn.functional.nll_loss(x, t, weight=invalid_weight) 6346 6347 @dtypes(*all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.complex32)) 6348 def test_copy_(self, device, dtype): 6349 def can_cast(src_dtype, dst_dtype): 6350 # torch.can_cast(torch.int16, torch.uint8) returns True 6351 # which isn't actually safe-cast. 6352 # This function returns False in this case. 6353 def is_unsigned_int(dtype): 6354 return dtype is torch.uint8 6355 6356 if is_unsigned_int(dst_dtype): 6357 return is_unsigned_int(src_dtype) 6358 return torch.can_cast(src_dtype, dst_dtype) 6359 6360 def make_tensor_wrapper(shape, dtype): 6361 if dtype is not torch.complex32: 6362 # Make tensor does not support generating 6363 # complex32 tensor 6364 return make_tensor(shape, device=device, dtype=dtype) 6365 return torch.randn(shape, device=device, dtype=dtype) 6366 6367 t = make_tensor_wrapper((50,), dtype) 6368 src_dtypes = all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.complex32) 6369 for src_dtype in src_dtypes: 6370 src = make_tensor_wrapper((50,), dtype=src_dtype) 6371 t.copy_(src) 6372 dst = make_tensor_wrapper((50, ), dtype=src_dtype) 6373 if can_cast(src_dtype, dtype): 6374 rtol = None 6375 atol = None 6376 if dtype in (torch.half, torch.complex32): 6377 rtol = 1e-3 6378 atol = 1e-3 6379 if dtype in (torch.bfloat16,): 6380 rtol = 1e-2 6381 atol = 1e-2 6382 self.assertEqual(src, dst.copy_(t), rtol=rtol, atol=atol) 6383 6384 @dtypes(*all_types_and_complex_and( 6385 torch.bool, torch.half, torch.bfloat16, torch.complex32, 6386 torch.uint16, torch.uint32, torch.uint64)) 6387 def test_item(self, device, dtype): 6388 if torch.device(device).type == 'xla' and dtype in [torch.uint16, torch.uint32, torch.uint64]: 6389 self.skipTest('uint16,32,64 not implemented on XLA') 6390 t = torch.ones((), device=device, dtype=dtype) 6391 self.assertEqual(1, t.item()) 6392 6393 @onlyNativeDeviceTypes 6394 def test_masked_scatter_inplace_noncontiguous(self, device): 6395 t = torch.zeros(5, 2, dtype=torch.long, device=device) 6396 t_non_contig = t.transpose(0, 1) 6397 t_contig = t_non_contig.contiguous() 6398 6399 assert t_contig.is_contiguous() 6400 assert not t_non_contig.is_contiguous() 6401 6402 mask = torch.tensor([[False, True], [False, True], [False, False], [True, True], [True, True]], device=device) 6403 mask_non_contig = mask.transpose(0, 1) 6404 mask_contig = mask_non_contig.contiguous() 6405 6406 assert mask_contig.is_contiguous() 6407 assert not mask_non_contig.is_contiguous() 6408 6409 # source is always converted to contiguous by the op. 6410 source = torch.tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 9]], device=device) 6411 6412 # t: contig, mask: contig 6413 expected = t_contig.masked_scatter_(mask_contig, source) 6414 6415 # t: non-contig, mask: non-contig 6416 actual = t_non_contig.masked_scatter_(mask_non_contig, source) 6417 self.assertEqual(actual, expected) 6418 6419 # t: contig, mask: non-contig 6420 actual = t_contig.masked_scatter_(mask_non_contig, source) 6421 self.assertEqual(actual, expected) 6422 6423 # t: non-contig, mask: contig 6424 actual = t_non_contig.masked_scatter_(mask_contig, source) 6425 self.assertEqual(actual, expected) 6426 6427 6428# Tests that compare a device's computation with the (gold-standard) CPU's. 6429class TestDevicePrecision(TestCase): 6430 exact_dtype = True 6431 6432 # FIXME: move to indexing test suite 6433 @onlyCUDA 6434 def test_index_add_bfloat16(self, device): 6435 inp_tensor = torch.randn(5, 3, device='cpu').bfloat16() 6436 t = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.bfloat16, device='cpu') 6437 index = torch.tensor([0, 4, 2], device='cpu') 6438 out_cpu = inp_tensor.index_add(0, index, t) 6439 6440 inp_tensor = inp_tensor.to(device=device) 6441 t = t.to(device=device) 6442 index = index.to(device=device) 6443 out_gpu = inp_tensor.index_add(0, index, t) 6444 6445 self.assertEqual(out_cpu, out_gpu, atol=1e-2, rtol=0) 6446 6447 # FIXME: move to serialization test suite 6448 def test_device_serialization(self, device): 6449 x = torch.randn(4, 4, device=device) 6450 6451 with tempfile.NamedTemporaryFile() as f: 6452 torch.save(x, f) 6453 f.seek(0) 6454 x_copy = torch.load(f) 6455 6456 self.assertEqual(x_copy, x) 6457 self.assertIs(type(x_copy), type(x)) 6458 self.assertEqual(x_copy.device, x.device) 6459 6460 # FIXME: move to serialization test suite 6461 @deviceCountAtLeast(2) 6462 def test_multidevice_serialization(self, devices): 6463 x = [torch.randn(4, 4, device=devices[0]), 6464 torch.randn(4, 4, device=devices[1])] 6465 6466 with tempfile.NamedTemporaryFile() as f: 6467 torch.save(x, f) 6468 f.seek(0) 6469 x_copy = torch.load(f) 6470 6471 for original, cp in zip(x, x_copy): 6472 self.assertEqual(cp, original) 6473 self.assertIs(type(cp), type(original)) 6474 self.assertEqual(cp.device, original.device) 6475 6476 # FIXME: move to data movement test suite 6477 @deviceCountAtLeast(1) 6478 def test_copy_noncontig(self, devices): 6479 def do_test(d0, d1): 6480 x = torch.tensor([1.5, 2.5, 3.5, 4.5, 5.5, 6.5], device=d0) 6481 y = torch.tensor([0, 0, 0, 0, 0, 0], device=d1) 6482 self.assertNotEqual(x.dtype, y.dtype) 6483 6484 y[::2].copy_(x[::2]) 6485 self.assertEqual(y, [1, 0, 3, 0, 5, 0]) 6486 6487 do_test('cpu', devices[0]) 6488 do_test(devices[0], 'cpu') 6489 6490 if len(devices) > 1: 6491 do_test(devices[0], devices[1]) 6492 6493 @deviceCountAtLeast(2) 6494 def test_type_conversions_same_device(self, devices): 6495 x = torch.randn(5, 5, device=devices[1]) 6496 self.assertEqual(x.int().device, torch.device(devices[1])) 6497 self.assertEqual(x.type(torch.int).device, torch.device(devices[1])) 6498 self.assertEqual(x.to(torch.int).device, torch.device(devices[1])) 6499 6500 @dtypesIfCUDA(torch.half, torch.float, torch.double, 6501 torch.int8, torch.short, torch.int, torch.long, 6502 torch.uint8) 6503 @dtypes(torch.float, torch.double, 6504 torch.int8, torch.short, torch.int, torch.long, 6505 torch.uint8) 6506 def test_from_sequence(self, device, dtype): 6507 seq = [list(range(i * 4, i * 4 + 4)) for i in range(5)] 6508 reference = torch.arange(0, 20).resize_(5, 4) 6509 self.assertEqual(torch.tensor(seq, dtype=dtype, device=device), reference, exact_dtype=False) 6510 6511 # FIXME: moved to indexing test suite 6512 @deviceCountAtLeast(1) 6513 def test_advancedindex_mixed_cpu_devices(self, devices) -> None: 6514 def test(x: torch.Tensor, ia: torch.Tensor, ib: torch.Tensor) -> None: 6515 # test getitem 6516 self.assertEqual(x[:, ia, None, ib, 0].cpu(), 6517 x.cpu()[:, ia.cpu(), None, ib.cpu(), 0]) 6518 self.assertEqual(x[ia], x.cpu()[ia.cpu()]) 6519 # test setitem 6520 x_clone1 = x.clone() 6521 x_clone2 = x.clone() 6522 first_shape = x[:, ia, None, ib, 0].shape 6523 second_shape = x[ia].shape 6524 x_clone1[:, ia, None, ib, 0] = torch.randn(first_shape).to(x_clone1) 6525 x_clone2[ia] = torch.randn(second_shape).to(x_clone2) 6526 6527 cpu = torch.device('cpu') 6528 for device in devices: 6529 x = torch.randn(3, 4, 4, 4, 3) 6530 ia = torch.tensor([0, 2, 1]) 6531 ib = torch.tensor([0, 2, 1]) 6532 6533 # Index device tensor with cpu tensor 6534 x = x.to(device) 6535 ia = ia.to(cpu) 6536 ib = ib.to(cpu) 6537 test(x, ia, ib) 6538 6539 # Index device tensor with mixed cpu, device tensors 6540 x = x.to(device) 6541 ia = ia.to(cpu) 6542 ib = ib.to(device) 6543 test(x, ia, ib) 6544 6545 @deviceCountAtLeast(1) 6546 def test_advancedindex_mixed_devices_error(self, devices) -> None: 6547 def test(x: torch.Tensor, ia: torch.Tensor, ib: torch.Tensor) -> None: 6548 # test getitem 6549 with self.assertRaisesRegex(RuntimeError, fr"indices should be either .* \({x.device}\)"): 6550 value = x[:, ia, None, ib, 0] 6551 with self.assertRaisesRegex(RuntimeError, fr"indices should be either .* \({x.device}\)"): 6552 value = x[ib] 6553 6554 cpu = torch.device('cpu') 6555 for device in devices: 6556 # Index cpu tensor with device tensor 6557 x = torch.randn(3, 4, 4, 4, 3) 6558 ia = torch.tensor([0, 2, 1]).to(device) 6559 ib = torch.tensor([0, 2, 1]).to(device) 6560 test(x, ia, ib) 6561 6562 # Index cpu tensor with mixed cpu, device tensors 6563 x = x.to(cpu) 6564 ia = ia.to(cpu) 6565 ib = ib.to(device) 6566 test(x, ia, ib) 6567 6568 if len(devices) > 1: 6569 other_device = devices[0] if device == devices[1] else devices[1] 6570 6571 # Index device tensor with mixed cpu, device tensors on different devices 6572 x = x.to(device) 6573 ia = ia.to(cpu) 6574 ib = ib.to(other_device) 6575 test(x, ia, ib) 6576 6577 # FIXME: move to data movement test suite 6578 def test_copy_broadcast(self, device) -> None: 6579 x = torch.randn(10, 5) 6580 y = torch.randn(5, device=device) 6581 x.copy_(y) 6582 self.assertEqual(x[3], y) 6583 6584 x = torch.randn(10, 5, device=device) 6585 y = torch.randn(5) 6586 x.copy_(y) 6587 self.assertEqual(x[3], y) 6588 6589 # FIXME: move to an elementwise ternary test suite 6590 @dtypes(torch.int64, torch.float32, torch.float64) 6591 def test_clamp(self, device, dtype): 6592 test_args = [ 6593 *product( 6594 [(100, 50), (10, 64), (97,)], # shape 6595 (True, False), # non-contiguous 6596 ) 6597 ] 6598 6599 for shape, noncontig in test_args: 6600 x = make_tensor(shape, device=device, dtype=dtype, 6601 noncontiguous=noncontig) 6602 ub = make_tensor(shape, device=device, dtype=dtype, 6603 noncontiguous=noncontig) 6604 lb = make_tensor(shape, device=device, dtype=dtype, 6605 noncontiguous=noncontig) 6606 6607 expect = x.max(lb).min(ub) 6608 actual = x.clamp(lb, ub) 6609 self.assertEqual(expect, actual) 6610 6611 expect = np.clip(x.cpu().numpy(), lb.cpu().numpy(), ub.cpu().numpy()) 6612 self.assertEqual(expect, actual) 6613 6614 expect = x.max(lb) 6615 actual = x.clamp(min=lb) 6616 self.assertEqual(expect, actual) 6617 6618 expect = x.min(ub) 6619 actual = x.clamp(max=ub) 6620 self.assertEqual(expect, actual) 6621 6622 # Test broadcasting min & max 6623 expect = x.max(lb[0]).min(ub[..., :1]) 6624 actual = x.clamp(lb[0], ub[..., :1]) 6625 self.assertEqual(expect, actual) 6626 6627 # Test broadcasting x 6628 expect = x[..., :1].max(lb).min(ub) 6629 actual = x[..., :1].clamp(lb, ub) 6630 self.assertEqual(expect, actual) 6631 6632 def test_cuda_device_idx(self, device): 6633 x = torch.zeros(3, device=device) 6634 y = torch._efficientzerotensor(3, device=device) 6635 self.assertEqual(x.device, y.device) 6636 6637# we implemented custom deallocation for subclasses, so it behooves 6638# us to make sure all of these bits work. We'll use __del__ to 6639# track if objects die or not 6640class Tracker: 6641 def __init__(self, marker): 6642 self.marker = marker 6643 6644 @staticmethod 6645 def make(): 6646 marker = [False] 6647 return marker, Tracker(marker) 6648 6649 def __del__(self): 6650 self.marker[0] = True 6651 6652@contextlib.contextmanager 6653def disable_gc(): 6654 if gc.isenabled(): 6655 try: 6656 gc.disable() 6657 yield 6658 finally: 6659 gc.enable() 6660 else: 6661 yield 6662 6663class TestTorch(TestCase): 6664 exact_dtype = True 6665 6666 def test_dir(self): 6667 dir(torch) 6668 6669 def test_wildcard_import(self): 6670 exec('from torch import *') 6671 6672 def test_newaxis_numpy_comparison(self): 6673 def run_test(tensor, *idx): 6674 npt = tensor.numpy() 6675 self.assertEqual(tensor[idx], npt[idx]) 6676 6677 # 1D Tensor Tests 6678 x = torch.arange(0, 10) 6679 cases = [ 6680 [None], 6681 [None, None], 6682 [Ellipsis, None], 6683 [None, Ellipsis], 6684 [2, None], 6685 [None, 2], 6686 [Ellipsis, None, 2], 6687 [Ellipsis, 2, None], 6688 [2, Ellipsis, None], 6689 [2, None, Ellipsis], 6690 [None, 2, Ellipsis], 6691 [None, Ellipsis, 2], 6692 ] 6693 6694 for case in cases: 6695 run_test(x, *case) 6696 6697 # 2D Tensor Tests 6698 x = torch.arange(0, 12).view(3, 4) 6699 cases = [ 6700 [None], 6701 [None, None], 6702 [None, None, None], 6703 [Ellipsis, None], 6704 [Ellipsis, None, None], 6705 [None, Ellipsis], 6706 [None, Ellipsis, None], 6707 [None, None, Ellipsis], 6708 [2, None], 6709 [2, None, Ellipsis], 6710 [2, Ellipsis, None], 6711 [None, 2, Ellipsis], 6712 [Ellipsis, 2, None], 6713 [Ellipsis, None, 2], 6714 [None, Ellipsis, 2], 6715 [1, 2, None], 6716 [1, 2, Ellipsis, None], 6717 [1, Ellipsis, 2, None], 6718 [Ellipsis, 1, None, 2], 6719 [Ellipsis, 1, 2, None], 6720 [1, None, 2, Ellipsis], 6721 [None, 1, Ellipsis, 2], 6722 [None, 1, 2, Ellipsis], 6723 ] 6724 6725 for case in cases: 6726 run_test(x, *case) 6727 6728 def _consecutive(self, size, start=1): 6729 sequence = torch.ones(torch.tensor(size).prod(0)).cumsum(0) 6730 sequence.add_(start - 1) 6731 return sequence.resize_(*size) 6732 6733 def test_newindex(self): 6734 reference = self._consecutive((3, 3, 3)) 6735 # This relies on __index__() being correct - but we have separate tests for that 6736 6737 def checkPartialAssign(index): 6738 reference = torch.zeros(3, 3, 3) 6739 reference[index] = self._consecutive((3, 3, 3))[index] 6740 self.assertEqual(reference[index], self._consecutive((3, 3, 3))[index], atol=0, rtol=0) 6741 reference[index] = 0 6742 self.assertEqual(reference, torch.zeros(3, 3, 3), atol=0, rtol=0) 6743 6744 checkPartialAssign(0) 6745 checkPartialAssign(1) 6746 checkPartialAssign(2) 6747 checkPartialAssign((0, 1)) 6748 checkPartialAssign((1, 2)) 6749 checkPartialAssign((0, 2)) 6750 checkPartialAssign(torch.LongTensor((0, 2))) 6751 6752 with self.assertRaises(IndexError): 6753 reference[1, 1, 1, 1] = 1 6754 with self.assertRaises(IndexError): 6755 reference[1, 1, 1, (1, 1)] = 1 6756 with self.assertRaises(IndexError): 6757 reference[3, 3, 3, 3, 3, 3, 3, 3] = 1 6758 with self.assertRaises(IndexError): 6759 reference[0.0] = 1 6760 with self.assertRaises(TypeError): 6761 reference[0.0:2.0] = 1 6762 with self.assertRaises(IndexError): 6763 reference[0.0, 0.0:2.0] = 1 6764 with self.assertRaises(IndexError): 6765 reference[0.0, :, 0.0:2.0] = 1 6766 with self.assertRaises(IndexError): 6767 reference[0.0, ..., 0.0:2.0] = 1 6768 with self.assertRaises(IndexError): 6769 reference[0.0, :, 0.0] = 1 6770 6771 # Test `torch._check*` functions 6772 def test_check(self): 6773 test_cases = [ 6774 # check function, expected error 6775 (torch._check, RuntimeError), 6776 (torch._check_index, IndexError), 6777 (torch._check_value, ValueError), 6778 (torch._check_type, TypeError), 6779 (torch._check_not_implemented, NotImplementedError), 6780 ] 6781 6782 for check_fn, expected_error in test_cases: 6783 # cond=True should not raise an error 6784 check_fn(True) 6785 6786 # Test default failure message for cond=False 6787 default_message = 'Expected cond to be True' 6788 with self.assertRaisesRegex(expected_error, default_message): 6789 check_fn(False) 6790 6791 # Test a simple failure message 6792 message = 'message' 6793 with self.assertRaisesRegex(expected_error, message): 6794 check_fn(False, lambda: message) 6795 6796 # Test message with tensor 6797 def message(): 6798 return torch.arange(4) 6799 6800 with self.assertRaisesRegex(expected_error, re.escape(str(message()))): 6801 check_fn(False, message) 6802 6803 # Test format string message 6804 def message(): 6805 return f"{'test'} {[1, 2, 'a', True]} {True} {100} {torch.arange(4)}" 6806 6807 with self.assertRaisesRegex(expected_error, re.escape(str(message()))): 6808 check_fn(False, message) 6809 6810 # Test incorrect `cond` arg type 6811 with self.assertRaisesRegex(TypeError, 'cond must be a bool'): 6812 check_fn('wrong type') 6813 6814 with self.assertRaisesRegex(TypeError, 'cond must be a bool'): 6815 check_fn(torch.tensor(True)) 6816 6817 # FIXME: move to indexing test suite 6818 def test_index_add(self): 6819 for device in get_all_device_types(): 6820 for dest_contig, src_contig, index_contig in product([True, False], repeat=3): 6821 for other_sizes in ((), (4, 5)): 6822 for dtype in [torch.int, torch.long]: 6823 num_copy, num_dest = 3, 3 6824 dest = torch.randn(num_dest, *other_sizes, device=device) 6825 if not dest_contig: 6826 dest = make_tensor(dest.shape, device=device, dtype=dest.dtype, noncontiguous=True) 6827 src = torch.randn(num_copy, *other_sizes, device=device) 6828 if not src_contig: 6829 src = noncontiguous_like(src) 6830 idx = torch.randperm(num_dest, dtype=dtype, device=device).narrow(0, 0, num_copy) 6831 if not index_contig: 6832 idx = noncontiguous_like(idx) 6833 # index_add_ without alpha argument 6834 dest2 = dest.clone() 6835 dest.index_add_(0, idx, src) 6836 for i in range(idx.size(0)): 6837 dest2[idx[i]] += src[i] 6838 self.assertEqual(dest, dest2) 6839 # index_add_ with alpha argument 6840 dest2 = dest.clone() 6841 dest.index_add_(0, idx, src, alpha=2) 6842 for i in range(idx.size(0)): 6843 dest2[idx[i]] += src[i] * 2 6844 self.assertEqual(dest, dest2) 6845 6846 # FIXME: resolve comment below and move this to indexing test suite 6847 # add coverage for issue with atomic add that appeared only for 6848 # specific dtypes on cuda: 6849 # https://github.com/pytorch/pytorch/issues/29153 6850 def test_index_add_all_dtypes(self): 6851 for device in get_all_device_types(): 6852 for dtype in get_all_math_dtypes(device): 6853 for idx_dtype in [torch.int, torch.long]: 6854 size = [5, 5] 6855 if dtype.is_floating_point or dtype.is_complex: 6856 tensor = torch.rand(size, dtype=dtype, device=device) 6857 elif dtype.is_signed: 6858 tensor = torch.randint(-5, 15, size, dtype=dtype, device=device) 6859 else: 6860 tensor = torch.randint(0, 10, size, dtype=dtype, device=device) 6861 6862 # index_add calls atomicAdd on cuda. 6863 zeros = torch.zeros(size, dtype=dtype, device=device) 6864 6865 added = zeros.index_add(0, torch.arange(0, size[0], dtype=idx_dtype, device=device), tensor) 6866 self.assertEqual(added, tensor) 6867 6868 added = zeros.index_add(0, torch.arange(0, size[0], dtype=idx_dtype, device=device), tensor, alpha=-1) 6869 self.assertEqual(added, -tensor) 6870 6871 @unittest.mock.patch.object(torch._dynamo.config, "suppress_errors", False) 6872 @set_default_dtype(torch.double) 6873 def test_index_add_correctness(self): 6874 # Check whether index_add can get correct result when 6875 # alpha is 1, and dtype of index is torch.long, 6876 # i.e., using scatter_add 6877 def helper(dim, dtype, device, size_result, size_source): 6878 tensor = torch.zeros(size_result, dtype=dtype, device=device) 6879 index = torch.randint(0, size_result[dim], (size_source[dim],), 6880 dtype=torch.long, device=device) 6881 if dtype.is_floating_point or dtype.is_complex: 6882 source = torch.rand(size_source, dtype=dtype, device=device) 6883 elif dtype.is_signed: 6884 source = torch.randint(-2, 5, size_source, dtype=dtype, device=device) 6885 else: 6886 source = torch.randint(0, 5, size_source, dtype=dtype, device=device) 6887 6888 ref_out = tensor.index_add(dim, index, source, alpha=2.) / 2. 6889 ref_out = ref_out.to(dtype=dtype) 6890 out = tensor.index_add(dim, index, source) 6891 if device == 'cuda': 6892 self.assertEqual(out, ref_out, atol=1e-2, rtol=1e-2) 6893 else: 6894 # scatter_add uses fp32 as accumulate type, while index_add doesn't. 6895 self.assertEqual(out, ref_out.to(dtype=dtype), atol=1e-2, rtol=1e-2) 6896 6897 for dim in [-1, -2, -3]: 6898 for dtype in all_types_and_complex_and(torch.half, torch.bfloat16): 6899 for device in get_all_device_types(): 6900 for size in [(2, 512, 256), (5, 256, 256)]: 6901 helper(dim, dtype, device, size, size) 6902 6903 # Check bound 6904 result = torch.zeros(1, 512, 256, dtype=dtype) 6905 source = torch.ones(1, 512, 256, dtype=dtype) 6906 index = torch.ones(257).to(dtype=torch.long) 6907 self.assertRaises(RuntimeError, lambda: result.index_add_(dim, index, source)) 6908 index = (torch.ones(256) * 257).to(dtype=torch.long) 6909 self.assertRaises(RuntimeError, lambda: result.index_add_(dim, index, source)) 6910 6911 def test_index_add_cornercase(self): 6912 for device in get_all_device_types(): 6913 dest = torch.randn((), device=device) 6914 index = torch.tensor([0], device=device) 6915 source = torch.randn(1, 1, 1, device=device) 6916 with self.assertRaisesRegex( 6917 RuntimeError, 6918 r"source tensor shape must match self tensor shape, excluding the specified dimension", 6919 ): 6920 dest.index_add(0, index, source) 6921 6922 def test_linspace_logspace(self): 6923 # Ensure the output does not require grad regardless of inputs requiring gard or not. 6924 # The output of factory functions should not be part of any computational graph. 6925 start = 0.0 6926 end = 3.0 6927 6928 for step in [0, 1, 2]: 6929 self.assertFalse( 6930 torch.linspace( 6931 torch.tensor(start, requires_grad=True), 6932 torch.tensor(end, requires_grad=True), step 6933 ).requires_grad 6934 ) 6935 self.assertFalse(torch.linspace(torch.tensor(start, requires_grad=True), end, step).requires_grad) 6936 self.assertFalse(torch.linspace(start, torch.tensor(end, requires_grad=True), step).requires_grad) 6937 self.assertFalse( 6938 torch.logspace( 6939 torch.tensor(start, requires_grad=True), 6940 torch.tensor(end, requires_grad=True), step 6941 ).requires_grad 6942 ) 6943 self.assertFalse(torch.logspace(torch.tensor(start, requires_grad=True), end, step).requires_grad) 6944 self.assertFalse(torch.logspace(start, torch.tensor(end, requires_grad=True), step).requires_grad) 6945 6946 # FIXME: move to shape ops test suite 6947 def test_unflatten(self): 6948 # test args: tensor, int, sizes 6949 self.assertEqual(torch.tensor([]).unflatten(0, (0, 1)), torch.empty(0, 1)) 6950 self.assertEqual(torch.tensor([1]).unflatten(0, (1, 1)), torch.tensor([[1]])) 6951 self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, (2, 2)), torch.tensor([[1, 2], [3, 4]])) 6952 self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, [2, 2]), torch.tensor([[1, 2], [3, 4]])) 6953 self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, torch.Size([2, 2])), torch.tensor([[1, 2], [3, 4]])) 6954 self.assertEqual(torch.ones(2, 10).unflatten(1, (5, 2)), torch.ones(2, 5, 2)) 6955 self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, (-1, 2)), 6956 torch.tensor([[1, 2], [3, 4]])) 6957 self.assertEqual(torch.ones(2, 10).unflatten(1, (5, -1)), 6958 torch.ones(2, 5, 2)) 6959 self.assertEqual(torch.ones(2, 10).unflatten(1, (-1,)), 6960 torch.ones(2, 10)) 6961 self.assertEqual(torch.ones(2, 3 * 4 * 5 * 6).unflatten(1, (3, 4, -1, 6)), 6962 torch.ones(2, 3, 4, 5, 6)) 6963 self.assertEqual(torch.ones(2, 0, 2).unflatten(1, (3, -1, 4, 5)), 6964 torch.ones(2, 3, 0, 4, 5, 2)) 6965 6966 # test invalid args: tensor, str, sizes 6967 with self.assertRaisesRegex(TypeError, r"unflatten\(\): argument 'dim' \(position 1\) must be int, not str"): 6968 torch.tensor([1]).unflatten('A', (1, 1)) 6969 6970 # test invalid args: tensor, str, namedshape 6971 with self.assertRaisesRegex(RuntimeError, r"Name 'A' not found in Tensor\[None\]."): 6972 torch.ones(4).unflatten('A', (('A', 2), ('B', 2))) 6973 6974 # test other invalid arguments 6975 with self.assertRaisesRegex(RuntimeError, r"sizes must be non-empty"): 6976 torch.tensor([1]).unflatten(0, []) 6977 with self.assertRaisesRegex(RuntimeError, r"Provided sizes \[2, 2\] don't multiply up to the size of dim 0 \(1\)"): 6978 torch.tensor([1]).unflatten(0, [2, 2]) 6979 with self.assertRaisesRegex(IndexError, r"Dimension specified as 0 but tensor has no dimensions"): 6980 torch.tensor(1).unflatten(0, [0]) 6981 with self.assertRaisesRegex(RuntimeError, r"only one dimension can be inferred"): 6982 torch.randn(5, 10).unflatten(1, (-1, -1)) 6983 with self.assertRaisesRegex(RuntimeError, 6984 r"Provided sizes \[-1, 4\] don't multiply up to the size of dim 1 \(10\)"): 6985 torch.randn(5, 10).unflatten(1, (-1, 4)) 6986 with self.assertRaisesRegex(RuntimeError, 6987 r"the unspecified dimension size -1 can be any value and is ambiguous"): 6988 torch.randn(2, 0).unflatten(1, (2, -1, 0)) 6989 6990 # Test that warnings generated from C++ are translated to the correct type 6991 def test_warn_types(self): 6992 test_cases = [ 6993 # function, warning type, message 6994 (torch._C._warn, UserWarning, r"Test message for TORCH_WARN"), 6995 (torch._C._warn_deprecation, DeprecationWarning, r"Test message for TORCH_WARN_DEPRECATION"), 6996 ] 6997 6998 for fn, warning_type, message in test_cases: 6999 with warnings.catch_warnings(record=True) as w: 7000 warnings.resetwarnings() 7001 warnings.filterwarnings('always', category=warning_type) 7002 fn() 7003 7004 self.assertEqual(len(w), 1, msg=f'{warning_type} not raised') 7005 warning = w[0].message 7006 self.assertTrue(isinstance(warning, warning_type), msg=f'{warning_type} not raised') 7007 self.assertTrue(re.search( 7008 message, 7009 str(warning))) 7010 7011 def test_structseq_repr(self): 7012 a = torch.arange(250).reshape(5, 5, 10) 7013 expected = """ 7014 torch.return_types.max( 7015 values=tensor([[ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], 7016 [ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], 7017 [140, 141, 142, 143, 144, 145, 146, 147, 148, 149], 7018 [190, 191, 192, 193, 194, 195, 196, 197, 198, 199], 7019 [240, 241, 242, 243, 244, 245, 246, 247, 248, 249]]), 7020 indices=tensor([[4, 4, 4, 4, 4, 4, 4, 4, 4, 4], 7021 [4, 4, 4, 4, 4, 4, 4, 4, 4, 4], 7022 [4, 4, 4, 4, 4, 4, 4, 4, 4, 4], 7023 [4, 4, 4, 4, 4, 4, 4, 4, 4, 4], 7024 [4, 4, 4, 4, 4, 4, 4, 4, 4, 4]]))""" 7025 self.assertEqual(repr(a.max(1)), textwrap.dedent(expected).strip()) 7026 7027 def test_is_same_size(self): 7028 t1 = torch.empty(3, 4, 9, 10) 7029 t2 = torch.empty(3, 4) 7030 t3 = torch.empty(1, 9, 3, 3) 7031 t4 = torch.empty(3, 4, 9, 10) 7032 7033 self.assertFalse(t1.is_same_size(t2)) 7034 self.assertFalse(t1.is_same_size(t3)) 7035 self.assertTrue(t1.is_same_size(t4)) 7036 7037 nt1 = torch.nested.nested_tensor([torch.ones(2, 4), torch.ones(3, 4), torch.ones(5, 4)]) 7038 nt2 = torch.nested.nested_tensor([torch.ones(2, 4), torch.ones(2, 4), torch.ones(2, 4)]) 7039 nt3 = torch.nested.nested_tensor([torch.ones(2, 4, 5), torch.ones(2, 6, 5)]) 7040 nt4 = torch.nested.nested_tensor([torch.ones(2, 4), torch.ones(3, 4), torch.ones(5, 4)]) 7041 7042 self.assertFalse(nt1.is_same_size(nt2)) 7043 self.assertFalse(nt1.is_same_size(nt3)) 7044 self.assertTrue(nt1.is_same_size(nt4)) 7045 with self.assertRaisesRegex(RuntimeError, "Expected both self and other to be nested tensors."): 7046 t1.is_same_size(nt1) 7047 7048 with self.assertRaisesRegex(RuntimeError, "Expected both self and other to be nested tensors."): 7049 nt1.is_same_size(t1) 7050 7051 def test_tensor_set(self): 7052 t1 = torch.tensor([]) 7053 t2 = torch.empty(3, 4, 9, 10).uniform_() 7054 t1.set_(t2) 7055 self.assertEqual(t1.storage()._cdata, t2.storage()._cdata) 7056 size = torch.Size([9, 3, 4, 10]) 7057 t1.set_(t2.storage(), 0, size) 7058 self.assertEqual(t1.size(), size) 7059 t1.set_(t2.storage(), 0, tuple(size)) 7060 self.assertEqual(t1.size(), size) 7061 self.assertEqual(t1.stride(), (120, 40, 10, 1)) 7062 stride = (10, 360, 90, 1) 7063 t1.set_(t2.storage(), 0, size, stride) 7064 self.assertEqual(t1.stride(), stride) 7065 t1.set_(t2.storage(), 0, size=size, stride=stride) 7066 self.assertEqual(t1.size(), size) 7067 self.assertEqual(t1.stride(), stride) 7068 7069 # test argument names 7070 t1 = torch.tensor([]) 7071 # 1. case when source is tensor 7072 t1.set_(source=t2) 7073 self.assertEqual(t1.storage()._cdata, t2.storage()._cdata) 7074 # 2. case when source is storage 7075 t1.set_(source=t2.storage()) 7076 self.assertEqual(t1.storage()._cdata, t2.storage()._cdata) 7077 # 3. case when source is storage, and other args also specified 7078 t1.set_(source=t2.storage(), storage_offset=0, size=size, stride=stride) 7079 self.assertEqual(t1.size(), size) 7080 self.assertEqual(t1.stride(), stride) 7081 7082 t1 = torch.tensor([True, True], dtype=torch.bool) 7083 t2 = torch.tensor([False, False], dtype=torch.bool) 7084 t1.set_(t2) 7085 self.assertEqual(t1.storage()._cdata, t2.storage()._cdata) 7086 7087 def test_tensor_set_errors(self): 7088 f_cpu = torch.randn((2, 3), dtype=torch.float32) 7089 d_cpu = torch.randn((2, 3), dtype=torch.float64) 7090 7091 # change dtype 7092 self.assertRaises(RuntimeError, lambda: f_cpu.set_(d_cpu.storage())) 7093 self.assertRaises(RuntimeError, 7094 lambda: f_cpu.set_(d_cpu.storage(), 0, d_cpu.size(), d_cpu.stride())) 7095 self.assertRaises(RuntimeError, lambda: f_cpu.set_(d_cpu)) 7096 7097 # change device 7098 if torch.cuda.is_available(): 7099 f_cuda = torch.randn((2, 3), dtype=torch.float32, device='cuda') 7100 7101 # cpu -> cuda 7102 self.assertRaises(RuntimeError, lambda: f_cpu.set_(f_cuda.storage())) 7103 self.assertRaises(RuntimeError, 7104 lambda: f_cpu.set_(f_cuda.storage(), 0, f_cuda.size(), f_cuda.stride())) 7105 self.assertRaises(RuntimeError, lambda: f_cpu.set_(f_cuda)) 7106 7107 # cuda -> cpu 7108 self.assertRaises(RuntimeError, lambda: f_cuda.set_(f_cpu.storage())) 7109 self.assertRaises(RuntimeError, 7110 lambda: f_cuda.set_(f_cpu.storage(), 0, f_cpu.size(), f_cpu.stride())) 7111 self.assertRaises(RuntimeError, lambda: f_cuda.set_(f_cpu)) 7112 7113 # FIXME: move this test test_testing.py (along with allclose testing) 7114 # NOTE: test_equal will be deprecated in favor of torch.testing.assert_close 7115 # once torch.testing is out of beta 7116 def test_equal(self): 7117 devices = [torch.cpu, torch.cuda] 7118 for device in ["cpu", "cuda"]: 7119 if device == "cuda" and not torch.cuda.is_available(): 7120 continue 7121 7122 # Contiguous, 1D 7123 t1 = torch.tensor((3., 4., 9., 10.), device=device) 7124 t2 = t1.contiguous() 7125 t3 = torch.tensor((1., 9., 3., 10.), device=device) 7126 t4 = torch.tensor((3., 4., 9.), device=device) 7127 t5 = torch.tensor([], device=device) 7128 self.assertTrue(t1.equal(t2)) 7129 self.assertFalse(t1.equal(t3)) 7130 self.assertFalse(t1.equal(t4)) 7131 self.assertFalse(t1.equal(t5)) 7132 self.assertTrue(torch.equal(t1, t2)) 7133 self.assertFalse(torch.equal(t1, t3)) 7134 self.assertFalse(torch.equal(t1, t4)) 7135 self.assertFalse(torch.equal(t1, t5)) 7136 7137 # Non contiguous, 2D 7138 s = torch.tensor(((1, 2, 3, 4), (5, 6, 7, 8)), device=device) 7139 s1 = s[:, 1:3] 7140 s2 = s1.clone() 7141 s3 = torch.tensor(((2, 3), (6, 7)), device=device) 7142 s4 = torch.tensor(((0, 0), (0, 0)), device=device) 7143 7144 self.assertFalse(s1.is_contiguous()) 7145 self.assertTrue(s1.equal(s2)) 7146 self.assertTrue(s1.equal(s3)) 7147 self.assertFalse(s1.equal(s4)) 7148 self.assertTrue(torch.equal(s1, s2)) 7149 self.assertTrue(torch.equal(s1, s3)) 7150 self.assertFalse(torch.equal(s1, s4)) 7151 7152 # Different dtypes 7153 x = torch.tensor((1, 2, 3), dtype=torch.float, device=device) 7154 y = torch.tensor((1, 2, 3), dtype=torch.int, device=device) 7155 z = torch.tensor((1, -1), dtype=torch.int, device=device) 7156 self.assertTrue(torch.equal(x, y)) 7157 self.assertFalse(torch.equal(z, x)) 7158 7159 # Fast path test: tensor flags, like neg and conj 7160 neg_0 = torch.tensor((1, 2, 3), dtype=torch.float, device=device) 7161 neg_1 = neg_0._neg_view() 7162 self.assertTrue(neg_1.is_neg()) 7163 self.assertEqual(neg_0.data_ptr(), neg_1.data_ptr()) 7164 self.assertEqual(neg_0.storage_offset(), neg_1.storage_offset()) 7165 self.assertEqual(neg_0.stride(), neg_1.stride()) 7166 self.assertEqual(neg_0.size(), neg_1.size()) 7167 self.assertFalse(torch.equal(neg_0, neg_1)) 7168 # FIXME: Disable the following check due to the inductor failure 7169 # See https://github.com/pytorch/pytorch/issues/100340 and 7170 # https://github.com/pytorch/pytorch/issues/98175 7171 if not TEST_WITH_TORCHINDUCTOR: 7172 self.assertTrue(torch.equal(neg_0, neg_1._neg_view())) 7173 7174 conj_0 = torch.tensor([1.0 + 2.0j, 2.0 + 1.0j], device=device) 7175 conj_1 = conj_0.conj() 7176 self.assertTrue(conj_1.is_conj()) 7177 self.assertEqual(conj_0.data_ptr(), conj_1.data_ptr()) 7178 self.assertEqual(conj_0.storage_offset(), conj_1.storage_offset()) 7179 self.assertEqual(conj_0.stride(), conj_1.stride()) 7180 self.assertEqual(conj_0.size(), conj_1.size()) 7181 self.assertFalse(torch.equal(conj_0, conj_1)) 7182 # FIXME: Disable the following check due to the inductor failure 7183 # See https://github.com/pytorch/pytorch/issues/100340 and 7184 # https://github.com/pytorch/pytorch/issues/98175 7185 if not TEST_WITH_TORCHINDUCTOR: 7186 self.assertTrue(torch.equal(conj_0, conj_1.conj())) 7187 7188 # Fast path test: two tensors share the same storage, but different dtype 7189 s_0 = torch.rand((2, 3), dtype=torch.float, device=device) 7190 s_1 = s_0.view(dtype=torch.int32) 7191 self.assertEqual(s_0.data_ptr(), s_1.data_ptr()) 7192 self.assertEqual(s_0.storage_offset(), s_1.storage_offset()) 7193 self.assertEqual(s_0.stride(), s_1.stride()) 7194 self.assertEqual(s_0.size(), s_1.size()) 7195 self.assertFalse(torch.equal(s_0, s_1)) 7196 7197 # Fast path test: two tensors share the same storage, but different strides 7198 t_0 = torch.rand((2, 3), dtype=torch.float, device=device) 7199 t_1 = t_0.t() 7200 self.assertEqual(t_0.data_ptr(), t_1.data_ptr()) 7201 self.assertEqual(t_0.storage_offset(), t_1.storage_offset()) 7202 self.assertNotEqual(t_0.stride(), t_1.stride()) 7203 self.assertNotEqual(t_0.size(), t_1.size()) 7204 self.assertFalse(torch.equal(t_0, t_1)) 7205 7206 # Fast path: tensor containing `nan` is not equal to self 7207 for dtype in floating_and_complex_types(): 7208 t = torch.tensor([1., float('nan')], dtype=dtype) 7209 self.assertFalse(torch.equal(t, t)) 7210 7211 def test_element_size(self): 7212 byte = torch.ByteStorage().element_size() 7213 char = torch.CharStorage().element_size() 7214 short = torch.ShortStorage().element_size() 7215 int = torch.IntStorage().element_size() 7216 long = torch.LongStorage().element_size() 7217 float = torch.FloatStorage().element_size() 7218 double = torch.DoubleStorage().element_size() 7219 bool = torch.BoolStorage().element_size() 7220 bfloat16 = torch.BFloat16Storage().element_size() 7221 complexfloat = torch.ComplexFloatStorage().element_size() 7222 complexdouble = torch.ComplexDoubleStorage().element_size() 7223 7224 self.assertEqual(byte, torch.ByteTensor().element_size()) 7225 self.assertEqual(byte, torch.ByteTensor().itemsize) 7226 self.assertEqual(char, torch.CharTensor().element_size()) 7227 self.assertEqual(char, torch.CharTensor().itemsize) 7228 self.assertEqual(short, torch.ShortTensor().element_size()) 7229 self.assertEqual(short, torch.ShortTensor().itemsize) 7230 self.assertEqual(int, torch.IntTensor().element_size()) 7231 self.assertEqual(int, torch.IntTensor().itemsize) 7232 self.assertEqual(long, torch.LongTensor().element_size()) 7233 self.assertEqual(long, torch.LongTensor().itemsize) 7234 self.assertEqual(float, torch.FloatTensor().element_size()) 7235 self.assertEqual(float, torch.FloatTensor().itemsize) 7236 self.assertEqual(double, torch.DoubleTensor().element_size()) 7237 self.assertEqual(double, torch.DoubleTensor().itemsize) 7238 self.assertEqual(bool, torch.BoolTensor().element_size()) 7239 self.assertEqual(bool, torch.BoolTensor().itemsize) 7240 self.assertEqual(bfloat16, torch.tensor([], dtype=torch.bfloat16).element_size()) 7241 self.assertEqual(bfloat16, torch.tensor([], dtype=torch.bfloat16).itemsize) 7242 self.assertEqual(complexfloat, torch.tensor([], dtype=torch.complex64).element_size()) 7243 self.assertEqual(complexfloat, torch.tensor([], dtype=torch.complex64).itemsize) 7244 self.assertEqual(complexdouble, torch.tensor([], dtype=torch.complex128).element_size()) 7245 self.assertEqual(complexdouble, torch.tensor([], dtype=torch.complex128).itemsize) 7246 7247 self.assertGreater(byte, 0) 7248 self.assertGreater(char, 0) 7249 self.assertGreater(short, 0) 7250 self.assertGreater(int, 0) 7251 self.assertGreater(long, 0) 7252 self.assertGreater(float, 0) 7253 self.assertGreater(double, 0) 7254 self.assertGreater(bool, 0) 7255 self.assertGreater(bfloat16, 0) 7256 self.assertGreater(complexfloat, 0) 7257 self.assertGreater(complexdouble, 0) 7258 7259 # These tests are portable, not necessarily strict for your system. 7260 self.assertEqual(byte, 1) 7261 self.assertEqual(char, 1) 7262 self.assertEqual(bool, 1) 7263 self.assertGreaterEqual(short, 2) 7264 self.assertGreaterEqual(int, 2) 7265 self.assertGreaterEqual(int, short) 7266 self.assertGreaterEqual(long, 4) 7267 self.assertGreaterEqual(long, int) 7268 self.assertGreaterEqual(double, float) 7269 7270 def test_permute(self): 7271 orig = [1, 2, 3, 4, 5, 6, 7] 7272 perm = torch.randperm(7).tolist() 7273 x = torch.empty(*orig).fill_(0) 7274 new = [i - 1 for i in x.permute(*perm).size()] 7275 self.assertEqual(perm, new) 7276 self.assertEqual(x.size(), orig) 7277 7278 @skipIfTorchDynamo("TorchDynamo fails with unknown reason") 7279 def test_reversed(self): 7280 val = torch.arange(0, 10) 7281 self.assertEqual(reversed(val), torch.arange(9, -1, -1)) 7282 7283 val = torch.arange(1, 10).view(3, 3) 7284 self.assertEqual(reversed(val), torch.tensor([[7, 8, 9], [4, 5, 6], [1, 2, 3]])) 7285 7286 val = torch.tensor(42) 7287 self.assertEqual(reversed(val), torch.tensor(42)) 7288 7289 def test_contains(self): 7290 x = torch.arange(0, 10) 7291 self.assertEqual(4 in x, True) 7292 self.assertEqual(12 in x, False) 7293 7294 x = torch.arange(1, 10).view(3, 3) 7295 val = torch.arange(1, 4) 7296 self.assertEqual(val in x, True) 7297 val += 10 7298 self.assertEqual(val in x, False) 7299 7300 self.assertRaisesRegex( 7301 RuntimeError, 7302 f"Tensor.__contains__ only supports Tensor or scalar, but you passed in a {str}.", 7303 lambda: "foo" in x) 7304 self.assertRaisesRegex( 7305 RuntimeError, 7306 f"Tensor.__contains__ only supports Tensor or scalar, but you passed in a {type([1, 2])}.", 7307 lambda: [1, 2] in x) 7308 7309 @skipIfTorchDynamo("TorchDynamo fails with unknown reason") 7310 def test_deepcopy_parameter(self): 7311 from copy import deepcopy 7312 l = torch.nn.Linear(10, 1) 7313 s = l.state_dict(keep_vars=True) 7314 self.assertEqual(torch.nn.Parameter, type(s['weight'])) 7315 self.assertEqual(torch.nn.Parameter, type(s['bias'])) 7316 7317 s2 = deepcopy(s) 7318 self.assertEqual(torch.nn.Parameter, type(s2['weight'])) 7319 self.assertEqual(torch.nn.Parameter, type(s2['bias'])) 7320 7321 def test_pickle(self): 7322 import pickle 7323 a = torch.randn(5, 5) 7324 serialized = pickle.dumps(a) 7325 b = pickle.loads(serialized) 7326 self.assertEqual(a, b) 7327 7328 @skipIfTorchDynamo("TorchDynamo fails with unknown reason") 7329 def test_pickle_parameter(self): 7330 import pickle 7331 a = torch.nn.Parameter(torch.randn(5, 5)) 7332 serialized = pickle.dumps(a) 7333 b = pickle.loads(serialized) 7334 self.assertTrue(isinstance(b, torch.nn.Parameter)) 7335 self.assertEqual(a.requires_grad, b.requires_grad) 7336 self.assertEqual(a, b) 7337 7338 @skipIfTorchDynamo("TorchDynamo fails with unknown reason") 7339 def test_pickle_parameter_no_requires_grad(self): 7340 import pickle 7341 a = torch.nn.Parameter(torch.randn(5, 5), requires_grad=False) 7342 serialized = pickle.dumps(a) 7343 b = pickle.loads(serialized) 7344 self.assertTrue(isinstance(b, torch.nn.Parameter)) 7345 self.assertEqual(a.requires_grad, b.requires_grad) 7346 self.assertEqual(a, b) 7347 7348 def test_pickle_dtype(self): 7349 t = torch.float32 7350 serialized = pickle.dumps(t) 7351 b = pickle.loads(serialized) 7352 self.assertTrue(isinstance(b, torch.dtype)) 7353 self.assertEqual(id(b), id(t)) 7354 7355 def test_pickle_size(self): 7356 a = torch.rand(10).size() 7357 serialized = pickle.dumps(a) 7358 b = pickle.loads(serialized) 7359 self.assertTrue(isinstance(b, torch.Size)) 7360 self.assertEqual(a, b) 7361 7362 def test_pickle_function(self): 7363 # https://github.com/pytorch/pytorch/issues/37703 7364 a = torch.tanh 7365 serialized = pickle.dumps(a) 7366 b = pickle.loads(serialized) 7367 self.assertEqual(a, b) 7368 7369 def test_generator_cpu(self): 7370 # test default generators are equal 7371 self.assertEqual(torch.default_generator, torch.default_generator) 7372 7373 # tests Generator API 7374 # manual_seed, seed, initial_seed, get_state, set_state 7375 g1 = torch.Generator() 7376 g2 = torch.Generator() 7377 g1.manual_seed(12345) 7378 g2.manual_seed(12345) 7379 self.assertEqual(g1.initial_seed(), g2.initial_seed()) 7380 7381 g1.seed() 7382 g2.seed() 7383 self.assertNotEqual(g1.initial_seed(), g2.initial_seed()) 7384 7385 g1 = torch.Generator() 7386 g2_state = g2.get_state() 7387 g2_randn = torch.randn(1, generator=g2) 7388 g1.set_state(g2_state) 7389 g1_randn = torch.randn(1, generator=g1) 7390 self.assertEqual(g1_randn, g2_randn) 7391 7392 default_state = torch.default_generator.get_state() 7393 q = torch.empty(100) 7394 g1_normal = q.normal_() 7395 g2 = torch.Generator() 7396 g2.set_state(default_state) 7397 g2_normal = q.normal_(generator=g2) 7398 self.assertEqual(g1_normal, g2_normal) 7399 7400 def test_invalid_generator_raises(self): 7401 self.assertRaises(RuntimeError, lambda: torch.Generator('opengl')) 7402 7403 def test_pickle_generator(self) -> None: 7404 devices = ['cpu'] 7405 if torch.cuda.is_available(): 7406 devices += ['cuda'] 7407 7408 for device in devices: 7409 with self.subTest(device=device): 7410 generator = torch.Generator(device=device).manual_seed(12345) 7411 if device != "cpu": 7412 generator.set_offset(100) 7413 torch.randn((100, 100), generator=generator, device=device) # progress the RNG state 7414 7415 reserialized: torch.Generator = pickle.loads(pickle.dumps(generator)) 7416 7417 self.assertEqual(generator.device, reserialized.device) 7418 self.assertEqual(generator.initial_seed(), reserialized.initial_seed()) 7419 if device != "cpu": 7420 self.assertEqual(generator.get_offset(), reserialized.get_offset()) 7421 torch.testing.assert_close(generator.get_state(), reserialized.get_state()) 7422 7423 def _sobol_reference_samples(self, scramble: bool) -> torch.Tensor: 7424 if not scramble: 7425 # theoretical values from Joe Kuo 2010 7426 return torch.tensor( 7427 [ 7428 [0., 0.], 7429 [0.5, 0.5], 7430 [0.75, 0.25], 7431 [0.25, 0.75], 7432 [0.375, 0.375], 7433 [0.875, 0.875], 7434 [0.625, 0.125], 7435 [0.125, 0.625], 7436 ], 7437 ) 7438 else: 7439 # theoretical values unknown: convergence properties checked 7440 return torch.tensor( 7441 [ 7442 [0.50860737, 0.29320504], 7443 [0.07116939, 0.89594537], 7444 [0.49354145, 0.11524881], 7445 [0.93097717, 0.70244044], 7446 [0.87266153, 0.23887917], 7447 [0.31021884, 0.57600391], 7448 [0.13687253, 0.42054182], 7449 [0.69931293, 0.77336788], 7450 ], 7451 ) 7452 7453 def test_sobolengine_bounds(self, scramble: bool = False): 7454 engine = torch.quasirandom.SobolEngine(100, scramble=scramble, seed=123456) 7455 sample = engine.draw(512) 7456 self.assertTrue(torch.all(sample >= 0)) 7457 self.assertTrue(torch.all(sample <= 1)) 7458 7459 def test_sobolengine_bounds_scrambled(self): 7460 self.test_sobolengine_bounds(scramble=True) 7461 7462 def test_sobolengine_draw(self, scramble: bool = False): 7463 ref_sample = self._sobol_reference_samples(scramble=scramble) 7464 engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) 7465 sample = engine.draw(n=len(ref_sample)) 7466 self.assertEqual(sample, ref_sample) 7467 self.assertEqual(engine.num_generated, len(ref_sample)) 7468 7469 def test_sobolengine_draw_scrambled(self): 7470 self.test_sobolengine_draw(scramble=True) 7471 7472 def test_sobolengine_first_point(self): 7473 for dtype in (torch.float, torch.double): 7474 engine = torch.quasirandom.SobolEngine(2, scramble=False) 7475 sample = engine.draw(1, dtype=dtype) 7476 self.assertTrue(torch.all(sample == 0)) 7477 self.assertEqual(sample.dtype, dtype) 7478 for dtype in (torch.float, torch.double): 7479 engine = torch.quasirandom.SobolEngine(2, scramble=True, seed=123456) 7480 sample = engine.draw(1, dtype=dtype) 7481 self.assertTrue(torch.all(sample != 0)) 7482 self.assertEqual(sample.dtype, dtype) 7483 7484 def test_sobolengine_continuing(self, scramble: bool = False): 7485 ref_sample = self._sobol_reference_samples(scramble=scramble) 7486 engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) 7487 n_half = len(ref_sample) // 2 7488 _ = engine.draw(n=n_half) 7489 sample = engine.draw(n=n_half) 7490 torch.testing.assert_close(sample, ref_sample[n_half:]) 7491 7492 def test_sobolengine_continuing_scrambled(self): 7493 self.test_sobolengine_continuing(scramble=True) 7494 7495 def test_sobolengine_reset(self, scramble: bool = False): 7496 ref_sample = self._sobol_reference_samples(scramble=scramble) 7497 engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) 7498 _ = engine.draw(n=len(ref_sample) // 2) 7499 engine.reset() 7500 self.assertEqual(engine.num_generated, 0) 7501 sample = engine.draw(n=len(ref_sample)) 7502 torch.testing.assert_close(sample, ref_sample) 7503 7504 def test_sobolengine_reset_scrambled(self): 7505 self.test_sobolengine_reset(scramble=True) 7506 7507 def test_sobolengine_fast_forward(self, scramble: bool = False): 7508 ref_sample = self._sobol_reference_samples(scramble=scramble) 7509 engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) 7510 engine.fast_forward(4) 7511 sample = engine.draw(n=4) 7512 torch.testing.assert_close(sample, ref_sample[4:]) 7513 # alternate fast forwarding with sampling 7514 engine.reset() 7515 even_draws = [] 7516 for i in range(8): 7517 if i % 2 == 0: 7518 even_draws.append(engine.draw()) 7519 else: 7520 engine.fast_forward(1) 7521 torch.testing.assert_close( 7522 ref_sample[[i for i in range(8) if i % 2 == 0]], 7523 torch.from_numpy(np.concatenate(even_draws)), 7524 ) 7525 7526 def test_sobolengine_fast_forward_scrambled(self): 7527 self.test_sobolengine_fast_forward(scramble=True) 7528 7529 def test_sobolengine_default_dtype(self): 7530 engine = torch.quasirandom.SobolEngine(dimension=3, scramble=True, seed=123456) 7531 # Check that default dtype is correctly handled 7532 self.assertEqual(engine.draw(n=5).dtype, torch.float32) 7533 with set_default_dtype(torch.float64): 7534 engine = torch.quasirandom.SobolEngine(dimension=3, scramble=True, seed=123456) 7535 # Check that default dtype is correctly handled (when set to float64) 7536 self.assertEqual(engine.draw(n=5).dtype, torch.float64) 7537 # Check that explicitly passed dtype is adhered to 7538 self.assertEqual(engine.draw(n=5, dtype=torch.float32).dtype, torch.float32) 7539 # Reinitialize the engine and check that first draw dtype is correctly handled 7540 engine = torch.quasirandom.SobolEngine(dimension=3, scramble=True, seed=123456) 7541 self.assertEqual(engine.draw(n=5, dtype=torch.float32).dtype, torch.float32) 7542 7543 @skipIfTorchDynamo("np.float64 restored as float32 after graph break.") 7544 def test_sobolengine_distribution(self, scramble=False): 7545 d = 50 7546 engine = torch.quasirandom.SobolEngine(d, scramble=scramble, seed=123456) 7547 sample = engine.draw(1024) 7548 torch.testing.assert_close( 7549 torch.mean(sample, dim=0), torch.full((d,), 0.5), atol=2, rtol=2 7550 ) 7551 torch.testing.assert_close( 7552 np.percentile(sample, 25, axis=0), np.repeat(0.25, d), atol=2, rtol=2 7553 ) 7554 torch.testing.assert_close( 7555 np.percentile(sample, 75, axis=0), np.repeat(0.75, d), atol=2, rtol=2 7556 ) 7557 7558 @skipIfTorchDynamo("np.float64 restored as float32 after graph break.") 7559 def test_sobolengine_distribution_scrambled(self): 7560 self.test_sobolengine_distribution(scramble=True) 7561 7562 def test_sobolengine_draw_base2(self, scramble=False): 7563 ref_sample = self._sobol_reference_samples(scramble=scramble) 7564 engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) 7565 sample = engine.draw_base2(2) 7566 self.assertEqual(ref_sample[:4], sample) 7567 # resampling still having N=2**n 7568 sample = engine.draw_base2(2) 7569 self.assertEqual(ref_sample[4:8], sample) 7570 7571 def test_sobolengine_draw_base2_scrambled(self): 7572 self.test_sobolengine_draw_base2(scramble=True) 7573 7574 def test_sobolengine_raise(self): 7575 maxdim = torch.quasirandom.SobolEngine.MAXDIM 7576 with self.assertRaises(ValueError): 7577 torch.quasirandom.SobolEngine(maxdim + 1) 7578 7579 def test_sobolengine_high_dim(self): 7580 engine = torch.quasirandom.SobolEngine(1111, scramble=False, seed=123456) 7581 samples1 = engine.draw() 7582 vals1, counts1 = torch.unique(samples1, return_counts=True) 7583 samples2 = engine.draw() 7584 vals2, counts2 = torch.unique(samples2, return_counts=True) 7585 self.assertEqual(vals1.item(), 0.0) 7586 self.assertEqual(counts1.item(), 1111) 7587 self.assertEqual(vals2.item(), 0.5) 7588 self.assertEqual(counts1.item(), 1111) 7589 7590 def test_parsing_int64(self): 7591 # accepts integer arguments 7592 x = torch.cumsum(torch.ones(5, 5), 0) 7593 self.assertEqual(x, torch.cumsum(torch.ones(5, 5), torch.tensor(0))) 7594 # doesn't accept floating point variables 7595 self.assertRaises(TypeError, lambda: torch.cumsum(torch.ones(5, 5), torch.tensor(0.))) 7596 7597 def test_parsing_double(self): 7598 # accepts floating point and integer arguments 7599 x = torch.randn(2, 3) 7600 torch.isclose(x, x, 1, 1) 7601 self.assertTrue(torch.isclose(x, x, 1, 1).all()) 7602 self.assertTrue(torch.isclose(x, x, 1.5, 1.).all()) 7603 # accepts floating point and integer tensors 7604 self.assertTrue(torch.isclose(x, x, torch.tensor(1), torch.tensor(1)).all()) 7605 self.assertTrue(torch.isclose(x, x, torch.tensor(1.5), torch.tensor(1.)).all()) 7606 # doesn't accept variables with requires_grad 7607 self.assertRaises(TypeError, 7608 lambda: torch.isclose(x, x, torch.tensor(1.5), torch.tensor(1., requires_grad=True)).all()) 7609 7610 def test_parsing_intlist(self): 7611 # parse with integer variables 7612 self.assertEqual(torch.Size([3, 4]), torch.ones((torch.tensor(3), torch.tensor(4))).shape) 7613 self.assertEqual(torch.Size([3, 4]), torch.ones(torch.tensor(3), torch.tensor(4)).shape) 7614 # parse with numpy integers 7615 self.assertEqual(torch.Size([3, 4]), torch.ones((np.array(3), np.int64(4))).shape) 7616 self.assertEqual(torch.Size([3, 4]), torch.ones(np.array(3), np.int64(4)).shape) 7617 self.assertEqual(torch.Size([3, 4]), torch.ones((np.int64(3), np.array(4))).shape) 7618 self.assertEqual(torch.Size([3, 4]), torch.ones(np.int64(3), np.array(4)).shape) 7619 7620 # fail parse with float variables 7621 self.assertRaises(TypeError, lambda: torch.ones((torch.tensor(3.), torch.tensor(4)))) 7622 # fail parse with numpy floats 7623 self.assertRaises(TypeError, lambda: torch.ones((3., torch.tensor(4)))) 7624 self.assertRaises(TypeError, lambda: torch.ones((np.array(3.), torch.tensor(4)))) 7625 7626 # fail parse with > 1 element variables 7627 self.assertRaises(TypeError, lambda: torch.ones(torch.tensor(3, 3))) 7628 self.assertRaises(TypeError, lambda: torch.ones(torch.tensor(3, 3))) 7629 self.assertRaises(TypeError, lambda: torch.ones(np.array(3, 3))) 7630 self.assertRaises(TypeError, lambda: torch.ones(np.array(3, 3))) 7631 7632 # fail parse with additional positional args after intlist arg 7633 self.assertRaisesRegex(TypeError, 7634 "received an invalid combination of arguments", 7635 lambda: torch.LongTensor((6, 0), 1, 1, 0)) 7636 self.assertRaisesRegex(TypeError, 7637 "missing 1 required positional arguments", 7638 lambda: torch.tensor().new_zeros((5, 5), 0)) 7639 7640 def test_from_buffer(self): 7641 a = bytearray([1, 2, 3, 4]) 7642 self.assertEqual(torch.ByteStorage.from_buffer(a).tolist(), [1, 2, 3, 4]) 7643 shorts = torch.ShortStorage.from_buffer(a, 'big') 7644 self.assertEqual(shorts.size(), 2) 7645 self.assertEqual(shorts.tolist(), [258, 772]) 7646 ints = torch.IntStorage.from_buffer(a, 'little') 7647 self.assertEqual(ints.size(), 1) 7648 self.assertEqual(ints[0], 67305985) 7649 f = bytearray([0x40, 0x10, 0x00, 0x00]) 7650 floats = torch.FloatStorage.from_buffer(f, 'big') 7651 self.assertEqual(floats.size(), 1) 7652 self.assertEqual(floats[0], 2.25) 7653 7654 f = bytearray([0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x40]) 7655 bools = torch.BoolStorage.from_buffer(f, 'big') 7656 self.assertEqual(bools.size(), 8) 7657 self.assertEqual(bools.tolist(), [False, True, True, True, True, True, True, True]) 7658 self.assertEqual(bools.type(), 'torch.BoolStorage') 7659 self.assertTrue(isinstance(bools, torch.BoolStorage)) 7660 7661 f = bytearray(b'\x80\x02\x8a\nl\xfc\x9cF\xf9 j\xa8P\x19.\x80\x02M\xe9') 7662 bools = torch.BoolStorage.from_buffer(f, 'big') 7663 self.assertEqual(bools.size(), 19) 7664 7665 f = bytearray(b'\0x4A') 7666 bools = torch.BoolStorage.from_buffer(f, 'big') 7667 self.assertEqual(bools.size(), 4) 7668 self.assertEqual(bools.tolist(), [False, True, True, True]) 7669 bytes = torch.ByteStorage.from_buffer(a) 7670 self.assertEqual(bytes.nbytes(), 4) 7671 self.assertEqual(bytes.tolist(), [1, 2, 3, 4]) 7672 self.assertTrue(isinstance(bytes, torch.ByteStorage)) 7673 7674 def test_storage_error(self): 7675 quantized_storages = [ 7676 torch.QInt32Storage, 7677 torch.QInt8Storage, 7678 torch.QUInt2x4Storage, 7679 torch.QUInt4x2Storage, 7680 torch.QUInt8Storage, 7681 ] 7682 7683 with self.assertRaisesRegex(RuntimeError, r"Only child classes of _LegacyStorage can be instantiated"): 7684 torch.storage._LegacyStorage() 7685 7686 for storage_class in torch._storage_classes: 7687 if storage_class in [torch.UntypedStorage, torch.TypedStorage]: 7688 continue 7689 7690 device = 'cuda' if storage_class.__module__ == 'torch.cuda' else 'cpu' 7691 dtype = storage_class.dtype 7692 7693 if device == 'cuda' and not torch.cuda.is_available(): 7694 continue 7695 7696 # Legacy <type>Storage constructor errors 7697 with self.assertRaisesRegex(RuntimeError, r"'device' cannot be specified"): 7698 storage_class(device='cpu') 7699 7700 with self.assertRaisesRegex(RuntimeError, r"'dtype' cannot be specified"): 7701 storage_class(dtype=torch.float) 7702 7703 with self.assertRaisesRegex(TypeError, r"got an unexpected keyword"): 7704 storage_class(sdlkjf=torch.float) 7705 7706 with self.assertRaisesRegex(RuntimeError, r"Too many positional arguments"): 7707 storage_class(0, 0) 7708 7709 with self.assertRaisesRegex(TypeError, r"invalid data type"): 7710 storage_class('string') 7711 7712 with self.assertRaisesRegex(TypeError, r"Argument type not recognized"): 7713 storage_class(torch.tensor([])) 7714 7715 s = storage_class() 7716 7717 with self.assertRaisesRegex(RuntimeError, r"No positional arguments"): 7718 storage_class(0, wrap_storage=s.untyped()) 7719 7720 with self.assertRaisesRegex(TypeError, r"must be UntypedStorage"): 7721 storage_class(wrap_storage=s) 7722 7723 if torch.cuda.is_available(): 7724 if storage_class in quantized_storages: 7725 with self.assertRaisesRegex(RuntimeError, r"Cannot create CUDA storage with quantized dtype"): 7726 s.cuda() 7727 7728 else: 7729 7730 if s.is_cuda: 7731 s_other_device = s.cpu() 7732 else: 7733 s_other_device = s.cuda() 7734 7735 with self.assertRaisesRegex(RuntimeError, r"Device of 'wrap_storage' must be"): 7736 storage_class(wrap_storage=s_other_device.untyped()) 7737 7738 # TypedStorage constructor errors 7739 with self.assertRaisesRegex(RuntimeError, r"No positional arguments"): 7740 torch.TypedStorage(0, wrap_storage=s.untyped(), dtype=dtype) 7741 7742 with self.assertRaisesRegex(RuntimeError, r"Argument 'dtype' must be specified"): 7743 torch.TypedStorage(wrap_storage=s.untyped()) 7744 7745 with self.assertRaisesRegex(TypeError, r"Argument 'dtype' must be torch.dtype"): 7746 torch.TypedStorage(wrap_storage=s.untyped(), dtype=0) 7747 7748 with self.assertRaisesRegex(RuntimeError, r"Argument 'device' should not be specified"): 7749 torch.TypedStorage(wrap_storage=s.untyped(), dtype=dtype, device=device) 7750 7751 with self.assertRaisesRegex(TypeError, r"Argument 'wrap_storage' must be UntypedStorage"): 7752 torch.TypedStorage(wrap_storage=s, dtype=dtype) 7753 7754 with self.assertRaisesRegex(RuntimeError, r"Storage device not recognized"): 7755 torch.TypedStorage(dtype=dtype, device='xla') 7756 7757 if torch.cuda.is_available(): 7758 if storage_class in quantized_storages: 7759 with self.assertRaisesRegex(RuntimeError, r"Cannot create CUDA storage with quantized dtype"): 7760 torch.TypedStorage(dtype=dtype, device='cuda') 7761 7762 with self.assertRaisesRegex(TypeError, r"Argument type not recognized"): 7763 torch.TypedStorage(torch.tensor([]), dtype=dtype, device=device) 7764 7765 with self.assertRaisesRegex(RuntimeError, r"Too many positional arguments"): 7766 torch.TypedStorage(0, 0, dtype=dtype, device=device) 7767 7768 if isinstance(s, torch.TypedStorage): 7769 s_other = torch.TypedStorage([1, 2, 3, 4], device=device, dtype=dtype) 7770 7771 with self.assertRaisesRegex(RuntimeError, r'cannot set item'): 7772 s.fill_(s_other) 7773 7774 def test_storage_error_no_attribute(self): 7775 storage_classes = [ 7776 torch.cuda.ByteStorage, 7777 torch.cuda.FloatStorage, 7778 ] 7779 for storage_class in storage_classes: 7780 with self.assertRaisesRegex(RuntimeError, r'Not available for CUDA storage'): 7781 storage_class.from_buffer() 7782 7783 with self.assertRaisesRegex(RuntimeError, r'Not available for CUDA storage'): 7784 storage_class._new_with_weak_ptr() 7785 7786 with self.assertRaisesRegex(RuntimeError, r'Not available for CUDA storage'): 7787 storage_class._new_shared_filename(0, 0, 0) 7788 7789 def test_storage_casts(self): 7790 storage = torch.IntStorage([-1, 0, 1, 2, 3, 4]) 7791 self.assertEqual(storage.size(), 6) 7792 self.assertEqual(storage.tolist(), [-1, 0, 1, 2, 3, 4]) 7793 self.assertEqual(storage.type(), 'torch.IntStorage') 7794 self.assertIs(storage.dtype, torch.int32) 7795 7796 floatStorage = storage.float() 7797 self.assertEqual(floatStorage.size(), 6) 7798 self.assertEqual(floatStorage.tolist(), [-1, 0, 1, 2, 3, 4]) 7799 self.assertEqual(floatStorage.type(), 'torch.FloatStorage') 7800 self.assertEqual(floatStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) 7801 self.assertIs(floatStorage.dtype, torch.float32) 7802 7803 halfStorage = storage.half() 7804 self.assertEqual(halfStorage.size(), 6) 7805 self.assertEqual(halfStorage.tolist(), [-1, 0, 1, 2, 3, 4]) 7806 self.assertEqual(halfStorage.type(), 'torch.HalfStorage') 7807 self.assertEqual(halfStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) 7808 self.assertIs(halfStorage.dtype, torch.float16) 7809 7810 bfloat16Storage = storage.bfloat16() 7811 self.assertEqual(bfloat16Storage.size(), 6) 7812 self.assertEqual(bfloat16Storage.tolist(), [-1, 0, 1, 2, 3, 4]) 7813 self.assertEqual(bfloat16Storage.type(), 'torch.BFloat16Storage') 7814 self.assertEqual(bfloat16Storage.int().tolist(), [-1, 0, 1, 2, 3, 4]) 7815 self.assertIs(bfloat16Storage.dtype, torch.bfloat16) 7816 7817 longStorage = storage.long() 7818 self.assertEqual(longStorage.size(), 6) 7819 self.assertEqual(longStorage.tolist(), [-1, 0, 1, 2, 3, 4]) 7820 self.assertEqual(longStorage.type(), 'torch.LongStorage') 7821 self.assertEqual(longStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) 7822 self.assertIs(longStorage.dtype, torch.int64) 7823 7824 shortStorage = storage.short() 7825 self.assertEqual(shortStorage.size(), 6) 7826 self.assertEqual(shortStorage.tolist(), [-1, 0, 1, 2, 3, 4]) 7827 self.assertEqual(shortStorage.type(), 'torch.ShortStorage') 7828 self.assertEqual(shortStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) 7829 self.assertIs(shortStorage.dtype, torch.int16) 7830 7831 doubleStorage = storage.double() 7832 self.assertEqual(doubleStorage.size(), 6) 7833 self.assertEqual(doubleStorage.tolist(), [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0]) 7834 self.assertEqual(doubleStorage.type(), 'torch.DoubleStorage') 7835 self.assertEqual(doubleStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) 7836 self.assertIs(doubleStorage.dtype, torch.float64) 7837 7838 charStorage = storage.char() 7839 self.assertEqual(charStorage.size(), 6) 7840 self.assertEqual(charStorage.tolist(), [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0]) 7841 self.assertEqual(charStorage.type(), 'torch.CharStorage') 7842 self.assertEqual(charStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) 7843 self.assertIs(charStorage.dtype, torch.int8) 7844 7845 byteStorage = storage.byte() 7846 self.assertEqual(byteStorage.size(), 6) 7847 self.assertEqual(byteStorage.tolist(), [255, 0, 1, 2, 3, 4]) 7848 self.assertEqual(byteStorage.type(), 'torch.ByteStorage') 7849 self.assertEqual(byteStorage.int().tolist(), [255, 0, 1, 2, 3, 4]) 7850 self.assertIs(byteStorage.dtype, torch.uint8) 7851 7852 boolStorage = storage.bool() 7853 self.assertEqual(boolStorage.size(), 6) 7854 self.assertEqual(boolStorage.tolist(), [True, False, True, True, True, True]) 7855 self.assertEqual(boolStorage.type(), 'torch.BoolStorage') 7856 self.assertEqual(boolStorage.int().tolist(), [1, 0, 1, 1, 1, 1]) 7857 self.assertIs(boolStorage.dtype, torch.bool) 7858 7859 complexfloat_storage = torch.ComplexFloatStorage([-1, 0, 1 + 2j, 2.5j, 3.5, 4 - 2j]) 7860 self.assertEqual(complexfloat_storage.size(), 6) 7861 self.assertEqual(complexfloat_storage.tolist(), [-1, 0, 1 + 2j, 2.5j, 3.5, 4 - 2j]) 7862 self.assertEqual(complexfloat_storage.type(), 'torch.ComplexFloatStorage') 7863 self.assertIs(complexfloat_storage.dtype, torch.complex64) 7864 7865 complexdouble_storage = complexfloat_storage.complex_double() 7866 self.assertEqual(complexdouble_storage.size(), 6) 7867 self.assertEqual(complexdouble_storage.tolist(), [-1, 0, 1 + 2j, 2.5j, 3.5, 4 - 2j]) 7868 self.assertEqual(complexdouble_storage.type(), 'torch.ComplexDoubleStorage') 7869 self.assertIs(complexdouble_storage.dtype, torch.complex128) 7870 7871 def test_storage_byteswap(self): 7872 input = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] 7873 swapped_8bytes = [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8] 7874 swapped_4bytes = [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12] 7875 swapped_2bytes = [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14] 7876 swapped_1byte = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] 7877 7878 storage = torch.storage.TypedStorage(input, dtype=torch.uint8)._untyped_storage 7879 7880 storage_f64 = storage.__copy__() 7881 storage_f64.byteswap(torch.float64) 7882 self.assertEqual(storage_f64.tolist(), swapped_8bytes) 7883 7884 storage_f32 = storage.__copy__() 7885 storage_f32.byteswap(torch.float32) 7886 self.assertEqual(storage_f32.tolist(), swapped_4bytes) 7887 7888 storage_f16 = storage.__copy__() 7889 storage_f16.byteswap(torch.float16) 7890 self.assertEqual(storage_f16.tolist(), swapped_2bytes) 7891 7892 storage_bf16 = storage.__copy__() 7893 storage_bf16.byteswap(torch.bfloat16) 7894 self.assertEqual(storage_bf16.tolist(), swapped_2bytes) 7895 7896 storage_i64 = storage.__copy__() 7897 storage_i64.byteswap(torch.int64) 7898 self.assertEqual(storage_i64.tolist(), swapped_8bytes) 7899 7900 storage_i32 = storage.__copy__() 7901 storage_i32.byteswap(torch.int32) 7902 self.assertEqual(storage_i32.tolist(), swapped_4bytes) 7903 7904 storage_i16 = storage.__copy__() 7905 storage_i16.byteswap(torch.int16) 7906 self.assertEqual(storage_i16.tolist(), swapped_2bytes) 7907 7908 storage_i8 = storage.__copy__() 7909 storage_i8.byteswap(torch.int8) 7910 self.assertEqual(storage_i8.tolist(), swapped_1byte) 7911 7912 storage_ui8 = storage.__copy__() 7913 storage_ui8.byteswap(torch.uint8) 7914 self.assertEqual(storage_ui8.tolist(), swapped_1byte) 7915 7916 storage_bool = storage.__copy__() 7917 storage_bool.byteswap(torch.bool) 7918 self.assertEqual(storage_bool.tolist(), swapped_1byte) 7919 7920 storage_c128 = storage.__copy__() 7921 storage_c128.byteswap(torch.complex128) 7922 self.assertEqual(storage_c128.tolist(), swapped_8bytes) 7923 7924 storage_c64 = storage.__copy__() 7925 storage_c64.byteswap(torch.complex64) 7926 self.assertEqual(storage_c64.tolist(), swapped_4bytes) 7927 7928 # Test that internal versions of functions related to TypedStorage do not 7929 # produce a deprecation warning 7930 def test_typed_storage_internal_no_warning(self): 7931 s0 = torch.FloatStorage(10) 7932 s0_untyped = s0.untyped() 7933 t0 = torch.randn(10) 7934 7935 funcs = [ 7936 lambda: torch.FloatStorage(_internal=True), 7937 lambda: torch.TypedStorage( 7938 dtype=torch.float, 7939 device='cpu', 7940 _internal=True), 7941 lambda: torch.TypedStorage( 7942 wrap_storage=s0_untyped, 7943 dtype=s0.dtype, 7944 _internal=True), 7945 lambda: torch.FloatStorage._dtype, 7946 lambda: s0._resize_(20), 7947 lambda: s0._size(), 7948 lambda: s0._untyped_storage, 7949 lambda: s0._is_shared(), 7950 lambda: s0._share_memory_(), 7951 lambda: s0._pickle_storage_type(), 7952 lambda: s0._setitem(slice(0, s0._size()), 1), 7953 lambda: s0._element_size(), 7954 lambda: s0._deepcopy({}), 7955 lambda: s0._data_ptr(), 7956 lambda: s0._nbytes(), 7957 lambda: t0._typed_storage(), 7958 ] 7959 7960 if torch.cuda.is_available(): 7961 s1 = torch.cuda.FloatStorage(10) 7962 s1_untyped = s1.untyped() 7963 t1 = torch.randn(10, device='cuda') 7964 7965 funcs += [ 7966 lambda: torch.cuda.FloatStorage(_internal=True), 7967 lambda: torch.TypedStorage( 7968 dtype=torch.float, 7969 device='cuda', 7970 _internal=True), 7971 lambda: torch.TypedStorage( 7972 wrap_storage=s1_untyped, 7973 dtype=s1.dtype, 7974 _internal=True), 7975 lambda: torch.cuda.FloatStorage._dtype, 7976 lambda: s1._resize_(20), 7977 lambda: s1._size(), 7978 lambda: s1._untyped_storage, 7979 lambda: s1._is_shared(), 7980 lambda: s1._share_memory_(), 7981 lambda: s1._pickle_storage_type(), 7982 lambda: s1._setitem(slice(0, s1._size()), 1), 7983 lambda: s1._element_size(), 7984 lambda: s1._deepcopy({}), 7985 lambda: s1._data_ptr(), 7986 lambda: s1._nbytes(), 7987 lambda: t1._typed_storage(), 7988 ] 7989 7990 # Check that each of the TypedStorage internal function calls do not 7991 # produce a deprecation warning 7992 for f in funcs: 7993 with warnings.catch_warnings(): 7994 warnings.filterwarnings('error', "TypedStorage is deprecated") 7995 f() 7996 7997 # Test that public functions related to TypedStorage produce a deprecation 7998 # warning 7999 @skipIfTorchInductor("FIXME") 8000 def test_typed_storage_deprecation_warning(self): 8001 s0 = torch.FloatStorage(10) 8002 funcs = [ 8003 lambda: torch.FloatStorage(), 8004 lambda: torch.FloatStorage.dtype, 8005 lambda: s0.fill_(0), 8006 lambda: s0.is_cuda, 8007 lambda: s0.untyped(), 8008 lambda: len(s0), 8009 lambda: s0[0], 8010 ] 8011 8012 if torch.cuda.is_available(): 8013 s1 = torch.cuda.FloatStorage(10) 8014 funcs += [ 8015 lambda: torch.cuda.FloatStorage(), 8016 lambda: torch.cuda.FloatStorage.dtype, 8017 lambda: s1.fill_(0), 8018 lambda: s1.is_cuda, 8019 lambda: s1.untyped(), 8020 lambda: len(s1), 8021 lambda: s1[0], 8022 ] 8023 8024 # Check that each of the TypedStorage function calls produce a warning 8025 # if warnings are reset between each 8026 for f in funcs: 8027 with AlwaysWarnTypedStorageRemoval(True): 8028 with warnings.catch_warnings(record=True) as w: 8029 warnings.resetwarnings() 8030 f() 8031 self.assertEqual(len(w), 1, msg=str([str(a) for a in w])) 8032 warning = w[0].message 8033 self.assertTrue(warning, DeprecationWarning) 8034 self.assertTrue(re.search( 8035 '^TypedStorage is deprecated', 8036 str(warning))) 8037 8038 # Test that only the first warning is raised by default 8039 torch.storage._reset_warn_typed_storage_removal() 8040 with warnings.catch_warnings(record=True) as w: 8041 warnings.resetwarnings() 8042 torch.FloatStorage() 8043 torch.randn(10).storage() 8044 self.assertEqual(len(w), 1, msg=str([str(a) for a in w])) 8045 warning = w[0].message 8046 self.assertTrue(re.search( 8047 '^TypedStorage is deprecated', 8048 str(warning))) 8049 # Check the line of code from the warning's stack 8050 with open(w[0].filename, encoding="utf-8") as f: 8051 code_line = f.readlines()[w[0].lineno - 1] 8052 self.assertTrue(re.search(re.escape('torch.FloatStorage()'), code_line)) 8053 8054 # Check that warnings are not emitted if it happened in the past 8055 with warnings.catch_warnings(record=True) as w: 8056 warnings.resetwarnings() 8057 torch.FloatStorage() 8058 torch.randn(10).storage() 8059 self.assertEqual(len(w), 0, msg=str([str(a) for a in w])) 8060 8061 def test_from_file(self): 8062 def assert_with_filename(filename): 8063 size = 10000 8064 s1 = torch.FloatStorage.from_file(filename, True, size) 8065 t1 = torch.FloatTensor(s1).copy_(torch.randn(size)) 8066 self.assertEqual(s1.data_ptr(), torch.FloatTensor(s1).data_ptr()) 8067 8068 # check mapping 8069 s2 = torch.FloatStorage.from_file(filename, True, size) 8070 t2 = torch.FloatTensor(s2) 8071 self.assertEqual(t1, t2, atol=0, rtol=0) 8072 8073 # check changes to t1 from t2 8074 rnum = random.uniform(-1, 1) 8075 t1.fill_(rnum) 8076 self.assertEqual(t1, t2, atol=0, rtol=0) 8077 8078 # check changes to t2 from t1 8079 rnum = random.uniform(-1, 1) 8080 t2.fill_(rnum) 8081 self.assertEqual(t1, t2, atol=0, rtol=0) 8082 8083 # release the tensors 8084 del s1, t1, s2, t2 8085 8086 with TemporaryFileName() as fname: 8087 assert_with_filename(fname) 8088 8089 if IS_FILESYSTEM_UTF8_ENCODING: 8090 with TemporaryDirectoryName(suffix='\u4e2d\u6587') as dname, TemporaryFileName(dir=dname) as fname: 8091 assert_with_filename(fname) 8092 8093 def test_torch_from_file(self): 8094 def assert_with_filename(filename): 8095 size = 10000 8096 s1 = torch.from_file(filename, True, size, dtype=torch.float) 8097 t1 = torch.FloatTensor(s1).copy_(torch.randn(size)) 8098 8099 # check mapping 8100 s2 = torch.from_file(filename, True, size, dtype=torch.float) 8101 t2 = torch.FloatTensor(s2) 8102 self.assertEqual(t1, t2, atol=0, rtol=0) 8103 8104 # check changes to t1 from t2 8105 rnum = random.uniform(-1, 1) 8106 t1.fill_(rnum) 8107 self.assertEqual(t1, t2, atol=0, rtol=0) 8108 8109 # check changes to t2 from t1 8110 rnum = random.uniform(-1, 1) 8111 t2.fill_(rnum) 8112 self.assertEqual(t1, t2, atol=0, rtol=0) 8113 8114 # release the tensors 8115 del s1, t1, s2, t2 8116 8117 with TemporaryFileName() as fname: 8118 assert_with_filename(fname) 8119 8120 if IS_FILESYSTEM_UTF8_ENCODING: 8121 with TemporaryDirectoryName(suffix='\u4e2d\u6587') as dname, TemporaryFileName(dir=dname) as fname: 8122 assert_with_filename(fname) 8123 8124 def test_print(self): 8125 default_type = torch.tensor([]).type() 8126 for t in torch._tensor_classes: 8127 if t == torch.HalfTensor: 8128 continue # HalfTensor does not support fill 8129 if t.is_sparse: 8130 continue 8131 if t.is_cuda and not torch.cuda.is_available(): 8132 continue 8133 obj = t(100, 100).fill_(1) 8134 obj.__repr__() 8135 str(obj) 8136 # test half tensor 8137 obj = torch.rand(100, 100, device='cpu').half() 8138 obj.__repr__() 8139 str(obj) 8140 for t in torch._storage_classes: 8141 if t == torch.BFloat16Storage: 8142 continue # Fix once fill is enabled for bfloat16 8143 if t.is_cuda and not torch.cuda.is_available(): 8144 continue 8145 if t == torch.BoolStorage or t == torch.cuda.BoolStorage: 8146 obj = t(100).fill_(True) 8147 else: 8148 obj = t(100).fill_(1) 8149 obj.__repr__() 8150 str(obj) 8151 8152 # test complex tensor 8153 # complex tensor print uses two formatters, one for real values 8154 # and the other for imag values. this is consistent with numpy 8155 x = torch.tensor([2.3 + 4j, 7 + 6j]) 8156 self.assertEqual(x.__repr__(), str(x)) 8157 self.assertExpectedInline(str(x), '''tensor([2.3000+4.j, 7.0000+6.j])''') 8158 8159 # test complex half tensor 8160 x = torch.tensor([1.25 + 4j, -7. + 6j], dtype=torch.chalf) 8161 self.assertEqual(x.__repr__(), str(x)) 8162 self.assertExpectedInline(str(x), '''tensor([ 1.2500+4.j, -7.0000+6.j], dtype=torch.complex32)''') 8163 8164 # test scientific notation for complex tensors 8165 x = torch.tensor([1e28 + 2j , -1e-28j]) 8166 self.assertEqual(x.__repr__(), str(x)) 8167 self.assertExpectedInline(str(x), '''tensor([1.0000e+28+2.0000e+00j, -0.0000e+00-1.0000e-28j])''') 8168 8169 # test big integer 8170 x = torch.tensor(2341234123412341) 8171 self.assertEqual(x.__repr__(), str(x)) 8172 self.assertExpectedInline(str(x), '''tensor(2341234123412341)''') 8173 8174 # test scientific notation 8175 x = torch.tensor([1e28, 1e-28]) 8176 self.assertEqual(x.__repr__(), str(x)) 8177 self.assertExpectedInline(str(x), '''tensor([1.0000e+28, 1.0000e-28])''') 8178 8179 # test scientific notation using set_printoptions 8180 x = torch.tensor([1e2, 1e-2]) 8181 torch.set_printoptions(sci_mode=True) 8182 self.assertEqual(x.__repr__(), str(x)) 8183 self.assertExpectedInline(str(x), '''tensor([1.0000e+02, 1.0000e-02])''') 8184 torch.set_printoptions(sci_mode=False) 8185 self.assertEqual(x.__repr__(), str(x)) 8186 self.assertExpectedInline(str(x), '''tensor([ 100.0000, 0.0100])''') 8187 torch.set_printoptions(sci_mode=None) # reset to the default value 8188 8189 # test no leading space if all elements positive 8190 x = torch.tensor([1, 2]) 8191 self.assertEqual(x.__repr__(), str(x)) 8192 self.assertExpectedInline(str(x), '''tensor([1, 2])''') 8193 8194 # test for leading space if there are negative elements 8195 x = torch.tensor([1, -2]) 8196 self.assertEqual(x.__repr__(), str(x)) 8197 self.assertExpectedInline(str(x), '''tensor([ 1, -2])''') 8198 8199 # test inf and nan 8200 x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1]) 8201 self.assertEqual(x.__repr__(), str(x)) 8202 self.assertExpectedInline(str(x), '''tensor([4.0000, inf, 1.5000, -inf, 0.0000, nan, 1.0000])''') 8203 8204 y = torch.tensor([4, inf, complex(1.5, inf), complex(-inf, 4), 0, complex(nan, inf), complex(3, nan)]) 8205 self.assertEqual(y.__repr__(), str(y)) 8206 expected_str = '''\ 8207tensor([4.0000+0.j, inf+0.j, 1.5000+infj, -inf+4.j, 0.0000+0.j, nan+infj, 8208 3.0000+nanj])''' 8209 self.assertExpectedInline(str(y), expected_str) 8210 8211 # test dtype 8212 with set_default_dtype(torch.float): 8213 x = torch.tensor([1e-324, 1e-323, 1e-322, 1e307, 1e308, 1e309], dtype=torch.float64) 8214 self.assertEqual(x.__repr__(), str(x)) 8215 expected_str = '''\ 8216tensor([ 0.0000e+00, 9.8813e-324, 9.8813e-323, 1.0000e+307, 1.0000e+308, 8217 inf], dtype=torch.float64)''' 8218 self.assertExpectedInline(str(x), expected_str) 8219 8220 # test changing default dtype 8221 with set_default_dtype(torch.float64): 8222 self.assertEqual(x.__repr__(), str(x)) 8223 expected_str = '''\ 8224tensor([ 0.0000e+00, 9.8813e-324, 9.8813e-323, 1.0000e+307, 1.0000e+308, 8225 inf])''' 8226 self.assertExpectedInline(str(x), expected_str) 8227 8228 # test summary 8229 x = torch.zeros(10000) 8230 self.assertEqual(x.__repr__(), str(x)) 8231 self.assertExpectedInline(str(x), '''tensor([0., 0., 0., ..., 0., 0., 0.])''') 8232 8233 # test internal summary function 8234 x = torch.rand(1, 20, 5, 30) 8235 summary = torch._tensor_str.get_summarized_data(x) 8236 self.assertEqual(summary.shape, (1, 6, 5, 6)) 8237 first_and_last = [0, 1, 2, -3, -2, -1] 8238 self.assertEqual(summary, x[:, first_and_last][..., first_and_last]) 8239 8240 # test device 8241 if torch.cuda.is_available(): 8242 x = torch.tensor([123], device='cuda:0') 8243 self.assertEqual(x.__repr__(), str(x)) 8244 self.assertExpectedInline(str(x), '''tensor([123], device='cuda:0')''') 8245 8246 # test changing default to cuda 8247 torch.set_default_tensor_type(torch.cuda.FloatTensor) 8248 self.assertEqual(x.__repr__(), str(x)) 8249 self.assertExpectedInline(str(x), '''tensor([123])''') 8250 8251 # test printing a tensor on a different gpu than current one. 8252 if torch.cuda.device_count() >= 2: 8253 with torch.cuda.device(1): 8254 self.assertEqual(x.__repr__(), str(x)) 8255 self.assertExpectedInline(str(x), '''tensor([123], device='cuda:0')''') 8256 8257 # test printing cpu tensor when default device is cuda 8258 y = torch.tensor([123], device='cpu') 8259 self.assertEqual(y.__repr__(), str(y)) 8260 self.assertExpectedInline(str(y), '''tensor([123], device='cpu')''') 8261 torch.set_default_tensor_type(default_type) 8262 8263 8264 # test integral floats and requires_grad 8265 x = torch.tensor([123.], requires_grad=True) 8266 self.assertEqual(x.__repr__(), str(x)) 8267 self.assertExpectedInline(str(x), '''tensor([123.], requires_grad=True)''') 8268 8269 # test non-contiguous print 8270 # sliced tensor should have > PRINT_OPTS.threshold elements 8271 x = torch.ones(100, 2, 2, 10) 8272 y = x.as_strided(size=(100, 2, 10), stride=(2 * 2 * 10, 2 * 10, 1)) 8273 self.assertEqual(str(y), y.__repr__()) 8274 expected_str = '''\ 8275tensor([[[1., 1., 1., ..., 1., 1., 1.], 8276 [1., 1., 1., ..., 1., 1., 1.]], 8277 8278 [[1., 1., 1., ..., 1., 1., 1.], 8279 [1., 1., 1., ..., 1., 1., 1.]], 8280 8281 [[1., 1., 1., ..., 1., 1., 1.], 8282 [1., 1., 1., ..., 1., 1., 1.]], 8283 8284 ..., 8285 8286 [[1., 1., 1., ..., 1., 1., 1.], 8287 [1., 1., 1., ..., 1., 1., 1.]], 8288 8289 [[1., 1., 1., ..., 1., 1., 1.], 8290 [1., 1., 1., ..., 1., 1., 1.]], 8291 8292 [[1., 1., 1., ..., 1., 1., 1.], 8293 [1., 1., 1., ..., 1., 1., 1.]]])\ 8294''' 8295 8296 self.assertExpectedInline(str(y), expected_str) 8297 8298 x = torch.ones(100, 2, 2, 10) * (1 + 1j) 8299 y = x.as_strided(size=(100, 2, 10), stride=(2 * 2 * 10, 2 * 10, 1)) 8300 self.assertEqual(str(y), y.__repr__()) 8301 expected_str = '''\ 8302tensor([[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], 8303 [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], 8304 8305 [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], 8306 [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], 8307 8308 [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], 8309 [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], 8310 8311 ..., 8312 8313 [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], 8314 [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], 8315 8316 [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], 8317 [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], 8318 8319 [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], 8320 [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]]])\ 8321''' 8322 self.assertExpectedInline(str(y), expected_str) 8323 8324 # test print 0-dim tensor: there's no 0-dim in Numpy, we match arrayprint style 8325 x = torch.tensor(0.00002) 8326 self.assertEqual(x.__repr__(), str(x)) 8327 self.assertExpectedInline(str(x), '''tensor(2.0000e-05)''') 8328 8329 # test print boolean tensor 8330 x = torch.tensor([True]) 8331 self.assertEqual(x.__repr__(), str(x)) 8332 self.assertExpectedInline(str(x), '''tensor([True])''') 8333 8334 x = torch.tensor(True) 8335 self.assertEqual(x.__repr__(), str(x)) 8336 self.assertExpectedInline(str(x), '''tensor(True)''') 8337 8338 # [Numpy] test print float in sci_mode when min < 0.0001. 8339 x = torch.tensor([0.00002]) 8340 self.assertEqual(x.__repr__(), str(x)) 8341 self.assertExpectedInline(str(x), '''tensor([2.0000e-05])''') 8342 8343 # [Numpy] test print complex in sci_mode when real_min < 0.0001 and (or) imag_min < 0.0001. 8344 x = torch.tensor([0.00002]) * (1 + 1j) 8345 self.assertEqual(x.__repr__(), str(x)) 8346 self.assertExpectedInline(str(x), '''tensor([2.0000e-05+2.0000e-05j])''') 8347 8348 # [Numpy] test print float in sci_mode when max > 1e8. 8349 # TODO: Pytorch uses fixed precision to print, while Numpy uses dragon4_scientific 8350 # to do automatic trimming and padding. 8351 x = torch.tensor([123456789.]) 8352 self.assertEqual(x.__repr__(), str(x)) 8353 self.assertExpectedInline(str(x), '''tensor([1.2346e+08])''') 8354 8355 # [Numpy] test print float in sci_mode when max / min > 1000. 8356 x = torch.tensor([0.01, 11]) 8357 self.assertEqual(x.__repr__(), str(x)) 8358 self.assertExpectedInline(str(x), '''tensor([1.0000e-02, 1.1000e+01])''') 8359 8360 # [Numpy] test print int max / min > 1000, no sci_mode 8361 x = torch.tensor([1, 1010]) 8362 self.assertEqual(x.__repr__(), str(x)) 8363 self.assertExpectedInline(str(x), '''tensor([ 1, 1010])''') 8364 8365 # [Numpy] test print int > 1e8, no sci_mode 8366 x = torch.tensor([1000000000]) # 1e9 8367 self.assertEqual(x.__repr__(), str(x)) 8368 self.assertExpectedInline(str(x), '''tensor([1000000000])''') 8369 8370 # [Numpy] test printing float in int_mode 8371 x = torch.tensor([1., 1000.]) 8372 self.assertEqual(x.__repr__(), str(x)) 8373 self.assertExpectedInline(str(x), '''tensor([ 1., 1000.])''') 8374 8375 # [Numpy] test printing float in int_mode in sci format when max / min > 1000. 8376 x = torch.tensor([1., 1010.]) 8377 self.assertEqual(x.__repr__(), str(x)) 8378 self.assertExpectedInline(str(x), '''tensor([1.0000e+00, 1.0100e+03])''') 8379 8380 def test_sizeof(self) -> None: 8381 sizeof_empty = torch.randn(0).storage().__sizeof__() 8382 sizeof_10 = torch.randn(10).storage().__sizeof__() 8383 sizeof_100 = torch.randn(100).storage().__sizeof__() 8384 self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10) 8385 self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0) 8386 8387 sizeof_empty = torch.randn(0).to(torch.uint8).storage().__sizeof__() 8388 sizeof_10 = torch.randn(10).to(torch.uint8).storage().__sizeof__() 8389 sizeof_100 = torch.randn(100).to(torch.uint8).storage().__sizeof__() 8390 self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10) 8391 self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0) 8392 8393 @skipIfTorchDynamo("Not a suitable test for TorchDynamo") 8394 def test_resizable(self) -> None: 8395 x = torch.randn(5) 8396 self.assertTrue(x.storage().resizable()) 8397 x.numpy() 8398 self.assertFalse(x.storage().resizable()) 8399 8400 def test_iter(self) -> None: 8401 x = torch.randn(5, 5) 8402 for i, sub in enumerate(x): 8403 self.assertEqual(sub, x[i]) # noqa: PLR1736 8404 8405 x = torch.tensor([]) 8406 self.assertEqual(list(x), []) 8407 8408 def test_new(self) -> None: 8409 x = torch.autograd.Variable(torch.tensor([])) 8410 y = torch.autograd.Variable(torch.randn(4, 4)) 8411 z = torch.autograd.Variable(torch.IntTensor([1, 2, 3])) 8412 self.assertEqual(x.new().shape, [0]) 8413 self.assertEqual(x.new(), x) 8414 self.assertEqual(x.new(1, 2).shape, [1, 2]) 8415 self.assertEqual(x.new(torch.Size([3, 4])).shape, [3, 4]) 8416 self.assertEqual(x.new([3, 4]).shape, [2]) 8417 self.assertEqual(x.new([3, 4]).tolist(), [3, 4]) 8418 self.assertEqual(x.new((3, 4)).tolist(), [3, 4]) 8419 self.assertEqual(x.new([np.int32(3), np.float64(4)]).tolist(), [3, 4]) 8420 self.assertEqual(x.new(np.array((3, 4))).tolist(), [3, 4]) 8421 self.assertEqual(x.new([z[2], z[0] + 3]).tolist(), [3, 4]) 8422 self.assertEqual(x.new(size=(3, 4)).shape, [3, 4]) 8423 self.assertEqual(x.new(()).shape, [0]) 8424 self.assertEqual(x.new(y.storage()).data_ptr(), y.data_ptr()) 8425 self.assertEqual(x.new(y).data_ptr(), y.data_ptr()) 8426 self.assertIsNot(x.new(y), y) 8427 8428 self.assertRaises(TypeError, lambda: x.new(z)) 8429 # TypeError would be better 8430 self.assertRaises(RuntimeError, lambda: x.new(z.storage())) 8431 8432 @unittest.skipIf(PYTORCH_CUDA_MEMCHECK, "is_pinned uses failure to detect pointer property") 8433 def test_pin_memory(self): 8434 x = torch.randn(3, 5) 8435 self.assertFalse(x.is_pinned()) 8436 if torch.cuda.is_available(): 8437 pinned = x.pin_memory() 8438 self.assertTrue(pinned.is_pinned()) 8439 self.assertEqual(pinned, x) 8440 self.assertNotEqual(pinned.data_ptr(), x.data_ptr()) 8441 # test that pin_memory on already pinned tensor has no effect 8442 self.assertIs(pinned, pinned.pin_memory()) 8443 self.assertEqual(pinned.data_ptr(), pinned.pin_memory().data_ptr()) 8444 8445 def test_error_msg_type_translation(self): 8446 with self.assertRaisesRegex( 8447 RuntimeError, 8448 # message includes both Double and Long 8449 '(?=.*Double)(?=.*Long)'): 8450 8451 # Calls model with a LongTensor input but DoubleTensor weights 8452 input = torch.zeros(1, 1, 1, 6, dtype=torch.long) 8453 weight = torch.nn.Parameter(torch.zeros(1, 1, 1, 3, dtype=torch.double)) 8454 model = torch.nn.Conv2d(1, 1, (1, 3), stride=1, padding=0, bias=False) 8455 model.weight = weight 8456 out = model(input) 8457 8458 def test_apply(self): 8459 x = torch.arange(1, 6) 8460 res = x.clone().apply_(lambda k: k + k) 8461 self.assertEqual(res, x * 2) 8462 self.assertRaises(TypeError, lambda: x.apply_(lambda k: "str")) 8463 8464 def test_map(self): 8465 x = torch.autograd.Variable(torch.randn(3, 3)) 8466 y = torch.autograd.Variable(torch.randn(3)) 8467 res = x.clone() 8468 res.map_(y, lambda a, b: a + b) 8469 self.assertEqual(res, x + y) 8470 self.assertRaisesRegex(TypeError, "not callable", lambda: res.map_(y, "str")) 8471 8472 def test_map2(self): 8473 x = torch.autograd.Variable(torch.randn(3, 3)) 8474 y = torch.autograd.Variable(torch.randn(3)) 8475 z = torch.autograd.Variable(torch.randn(1, 3)) 8476 res = x.clone() 8477 res.map2_(y, z, lambda a, b, c: a + b * c) 8478 self.assertEqual(res, x + y * z) 8479 z.requires_grad = True 8480 self.assertRaisesRegex( 8481 RuntimeError, "requires grad", 8482 lambda: res.map2_(y, z, lambda a, b, c: a + b * c)) 8483 8484 def test_Size(self): 8485 x = torch.Size([1, 2, 3]) 8486 self.assertIsInstance(x, tuple) 8487 self.assertEqual(x[0], 1) 8488 self.assertEqual(x[1], 2) 8489 self.assertEqual(x[2], 3) 8490 self.assertEqual(len(x), 3) 8491 self.assertRaises(TypeError, lambda: torch.Size(torch.ones(3))) 8492 8493 self.assertIsInstance(x * 2, torch.Size) 8494 self.assertIsInstance(x[:-1], torch.Size) 8495 self.assertIsInstance(x + x, torch.Size) 8496 8497 def test_Size_scalar(self): 8498 three = torch.tensor(3) 8499 two = torch.tensor(2) 8500 x = torch.Size([0, 1, two, three, 4]) 8501 for i in range(1, 5): 8502 self.assertEqual(x[i], i) 8503 8504 def test_Size_iter(self): 8505 for sizes in [iter([1, 2, 3, 4, 5]), range(1, 6)]: 8506 x = torch.Size(sizes) 8507 for i in range(0, 5): 8508 self.assertEqual(x[i], i + 1) 8509 8510 def test_t_not_2d_error(self): 8511 self.assertRaises(RuntimeError, lambda: torch.randn(2, 3, 4).t()) 8512 self.assertRaises(RuntimeError, lambda: torch.randn(2, 3, 4).t_()) 8513 8514 # skip this test for now as it affects all tests 8515 @unittest.skipIf(True, "flush_denormal not supported") 8516 def test_set_flush_denormal(self): 8517 tiny_float = 1e-42 8518 tiny_double = 1e-320 8519 float_tensor = torch.FloatTensor([1.0, tiny_float]) 8520 double_tensor = torch.DoubleTensor([1.0, tiny_float, tiny_double]) 8521 8522 self.assertEqual(float_tensor[0], 1.0, atol=0.0, rtol=0) 8523 self.assertEqual(float_tensor[1], tiny_float, atol=tiny_float / 16, rtol=0) 8524 self.assertEqual(double_tensor[0], 1.0, atol=0.0, rtol=0) 8525 self.assertEqual(double_tensor[1], tiny_float, atol=0.0, rtol=0) 8526 self.assertEqual(double_tensor[2], tiny_double, atol=0.0, rtol=0) 8527 8528 torch.set_flush_denormal(True) 8529 self.assertEqual(float_tensor[0], 1.0, atol=0.0, rtol=0) 8530 self.assertEqual(float_tensor[1], 0.0, atol=0.0, rtol=0) # tiny_float to zero 8531 self.assertEqual(double_tensor[0], 1.0, atol=0.0, rtol=0) 8532 # tiny_float is not converted to zero in double type 8533 self.assertEqual(double_tensor[1], tiny_float, atol=0.0, rtol=0) 8534 self.assertEqual(double_tensor[2], 0.0, atol=0.0, rtol=0) # tiny_double to zero 8535 torch.set_flush_denormal(False) 8536 8537 def test_show_config(self): 8538 # We can't usefully test the output; just make sure this doesn't crash 8539 torch.__config__.show() 8540 8541 @unittest.skipIf(IS_FBCODE, "CXX_FLAGS is only for OSS build.") 8542 def test_cxx_flags(self): 8543 torch.__config__._cxx_flags() 8544 8545 def test_parallel_info(self): 8546 torch.__config__.parallel_info() 8547 8548 def test_get_cpu_capability(self): 8549 # This method is primarily exposed for torchvision's resize 8550 torch.backends.cpu.get_cpu_capability() 8551 8552 # We have to ensure that method is torchscriptable as torchvision's resize 8553 # should be torchscriptable 8554 torch.jit.script(torch.backends.cpu.get_cpu_capability) 8555 8556 @slowTest 8557 def test_slow_test(self): 8558 # Just a smoketest to make sure our slowTest decorator works. 8559 pass 8560 8561 def test_is_nonzero(self): 8562 with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with no values is ambiguous"): 8563 torch.tensor([]).is_nonzero() 8564 with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with more than one value is ambiguous"): 8565 torch.tensor([0, 0]).is_nonzero() 8566 self.assertFalse(torch.tensor(0).is_nonzero()) 8567 self.assertTrue(torch.tensor(1).is_nonzero()) 8568 self.assertFalse(torch.tensor([0]).is_nonzero()) 8569 self.assertTrue(torch.tensor([1]).is_nonzero()) 8570 self.assertFalse(torch.tensor([[0]]).is_nonzero()) 8571 self.assertTrue(torch.tensor([[1]]).is_nonzero()) 8572 self.assertTrue(torch.tensor(0.1).is_nonzero()) 8573 self.assertTrue(torch.tensor(-0.1).is_nonzero()) 8574 self.assertFalse(torch.tensor(0.0).is_nonzero()) 8575 self.assertTrue(torch.tensor(True).is_nonzero()) 8576 self.assertFalse(torch.tensor(False).is_nonzero()) 8577 self.assertFalse(torch.tensor(0 + 0j).is_nonzero()) 8578 self.assertTrue(torch.tensor(0 + 0.1j).is_nonzero()) 8579 8580 def test_assert_async(self): 8581 with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with no values is ambiguous"): 8582 torch._assert_async(torch.tensor([])) 8583 with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with more than one value is ambiguous"): 8584 torch._assert_async(torch.tensor([0, 0])) 8585 with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"): 8586 torch._assert_async(torch.tensor(0)) 8587 torch._assert_async(torch.tensor(1)) 8588 torch._assert_async(torch.tensor(0.1)) 8589 torch._assert_async(torch.tensor(-0.1)) 8590 with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"): 8591 torch._assert_async(torch.tensor(0.0)) 8592 torch._assert_async(torch.tensor(True)) 8593 with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"): 8594 torch._assert_async(torch.tensor(False)) 8595 torch._assert_async(torch.tensor(0 + 0.1j)) 8596 with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"): 8597 torch._assert_async(torch.tensor(0 + 0j)) 8598 8599 # NB: we must not be built with CUDA; if we are built with CUDA but no CUDA 8600 # is available, we get a different error. 8601 @unittest.skipIf(torch.backends.cuda.is_built() or IS_SANDCASTLE, "CUDA is built, can't test CUDA not built error") 8602 def test_cuda_not_built(self): 8603 msg = "Torch not compiled with CUDA enabled" 8604 self.assertRaisesRegex(AssertionError, msg, lambda: torch.cuda.current_device()) 8605 self.assertRaisesRegex(AssertionError, msg, lambda: torch.tensor([1], device="cuda")) 8606 self.assertRaisesRegex(AssertionError, msg, lambda: torch.tensor([1]).cuda()) 8607 self.assertRaisesRegex(TypeError, msg, lambda: torch.cuda.FloatTensor()) 8608 self.assertRaisesRegex(TypeError, msg, lambda: torch.set_default_tensor_type(torch.cuda.FloatTensor)) 8609 self.assertRaisesRegex(AssertionError, msg, lambda: torch.tensor([1]).to(device="cuda")) 8610 8611 def test_has_internal_overlap(self): 8612 OVERLAP_NO = 0 8613 OVERLAP_YES = 1 8614 OVERLAP_TOO_HARD = 2 8615 8616 # Check for contiguous tensors 8617 a = torch.randn(3, 3) 8618 self.assertEqual(torch._debug_has_internal_overlap(a), OVERLAP_NO) 8619 8620 # Checks for zero strides 8621 b = torch.randn(1, 3) 8622 b_expanded = b.expand(4, 3) 8623 self.assertEqual(torch._debug_has_internal_overlap(b_expanded), OVERLAP_YES) 8624 8625 # Check for zero strided, size 1 axis, in non-contiguous storage (gh-33812) 8626 c = torch.randn(10).as_strided([2, 1, 5], [1, 0, 2]) 8627 self.assertEqual(torch._debug_has_internal_overlap(c), OVERLAP_NO) 8628 c = torch.randn(2, 1, 10)[::2].as_strided((2, 1, 5), (10, 0, 2)) 8629 self.assertEqual(torch._debug_has_internal_overlap(c), OVERLAP_TOO_HARD) 8630 8631 def test_allow_tensor_metadata_change(self): 8632 a = torch.ones(2, 3) 8633 # Metadata changes are allowed on view tensors that are created from detach(). 8634 8635 def test_memory_format(self): 8636 def test_helper(x, memory_format): 8637 y = x.contiguous(memory_format=memory_format) 8638 self.assertFalse(y.is_contiguous()) 8639 self.assertTrue(y.is_contiguous(memory_format=memory_format)) 8640 self.assertEqual(y, x) 8641 8642 test_helper(torch.randn(4, 3, 8, 8), torch.channels_last) 8643 test_helper(torch.randn(4, 3, 8, 8, 8), torch.channels_last_3d) 8644 8645 def test_memory_format_contiguous_returns_same_tensor_if_already_satisfies(self): 8646 def test_helper(x, memory_format): 8647 alias = x.contiguous(memory_format=memory_format) 8648 alias.fill_(7) 8649 self.assertEqual(x, alias) 8650 8651 test_helper(torch.randn(4, 8, 8, 3).permute(0, 3, 1, 2), torch.channels_last) 8652 test_helper(torch.randn(4, 8, 8, 8, 3).permute(0, 4, 1, 2, 3), torch.channels_last_3d) 8653 8654 def test_memory_format_empty(self): 8655 def test_helper(dim1, dim2, memory_format): 8656 with self.assertRaises(RuntimeError): 8657 x = torch.empty(dim1, memory_format=memory_format) 8658 x = torch.empty(dim2, memory_format=memory_format) 8659 self.assertTrue(x.is_contiguous(memory_format=memory_format)) 8660 8661 test_helper((3, 3), (3, 3, 3, 3), torch.channels_last) 8662 test_helper((3, 3, 3), (3, 3, 3, 3, 3), torch.channels_last_3d) 8663 8664 def test_dim_order(self): 8665 shape = (2, 3, 5, 7) 8666 8667 t = torch.empty(shape) 8668 self.assertSequenceEqual(t.dim_order(), (0, 1, 2, 3), seq_type=tuple) 8669 # transpose doesn't really change the underlying physical memory 8670 # so expecting dim_order change to reflect that (like strides) 8671 self.assertSequenceEqual(t.transpose(0, 1).dim_order(), (1, 0, 2, 3)) 8672 8673 t = torch.empty(shape, memory_format=torch.channels_last) 8674 self.assertSequenceEqual(t.dim_order(), (0, 2, 3, 1)) 8675 8676 t = torch.empty((2, 3, 5, 7, 8), memory_format=torch.channels_last_3d) 8677 self.assertSequenceEqual(t.dim_order(), (0, 2, 3, 4, 1)) 8678 8679 for dim_order in itertools.permutations(range(4)): 8680 self.assertSequenceEqual( 8681 dim_order, torch.empty_permuted(shape, dim_order).dim_order() 8682 ) 8683 8684 for shape in [(2, 2, 2, 2), (2, 1, 2, 2), (2, 2, 1, 2), (2, 2, 2, 1), (2, 2, 1, 1), (2, 1, 1, 2)]: 8685 for memory_format in (torch.contiguous_format, torch.channels_last): 8686 t = torch.empty(shape).to(memory_format=memory_format) 8687 if memory_format == torch.contiguous_format: 8688 dim_order_target = list(range(len(shape))) 8689 elif memory_format == torch.channels_last: 8690 dim_order_target = [0, *list(range(2, len(shape))), 1] 8691 8692 self.assertSequenceEqual(dim_order_target, t.dim_order()) 8693 8694 def test_subclass_tensors(self): 8695 # raise an error when trying to subclass FloatTensor 8696 with self.assertRaisesRegex(TypeError, "type 'torch.FloatTensor' is not an acceptable base type"): 8697 class Foo1(torch.FloatTensor): 8698 pass 8699 8700 # but allow subclassing Tensor: 8701 class Foo2(torch.Tensor): 8702 def foo(self): 8703 return 5 8704 f = Foo2() 8705 self.assertEqual(f.foo(), 5) 8706 8707 def test_ndim(self): 8708 a = torch.randn(1, 2, 3) 8709 self.assertEqual(3, a.ndim) 8710 b = torch.randn(()) 8711 self.assertEqual(0, b.ndim) 8712 c = torch.randn(1, 0) 8713 self.assertEqual(2, c.ndim) 8714 8715 def test_nbytes(self): 8716 a = torch.randn(1, 2, 3, dtype=torch.float64) 8717 self.assertEqual(a.numel() * a.element_size(), a.nbytes) 8718 b = torch.randn(()) 8719 self.assertEqual(b.numel() * b.element_size(), b.nbytes) 8720 c = torch.randn(1, 0) 8721 self.assertEqual(c.numel() * c.element_size(), c.nbytes) 8722 8723 def test_fill_diagonal(self): 8724 a1 = torch.randn(7, 3) 8725 a2 = a1.clone() 8726 v = 1 8727 for i in range(3): 8728 a2[i][i] = v 8729 a1.fill_diagonal_(v) 8730 self.assertEqual(a1, a2) 8731 8732 b1 = torch.randn(7, 3) 8733 b2 = b1.clone() 8734 for i in range(3): 8735 b2[i][i] = v 8736 b2[i + 4][i] = v 8737 b1.fill_diagonal_(v, wrap=True) 8738 self.assertEqual(b1, b2) 8739 8740 c1 = torch.rand(3, 3, 3) 8741 c2 = c1.clone() 8742 for i in range(3): 8743 c2[i][i][i] = v 8744 c1.fill_diagonal_(v) 8745 self.assertEqual(c1, c2) 8746 8747 # non-contiguous tensor 8748 d1 = torch.rand(3, 3, 3)[:, 1, ...] 8749 d2 = d1.clone() 8750 for i in range(3): 8751 d2[i][i] = v 8752 d1.fill_diagonal_(v) 8753 self.assertEqual(d1, d2) 8754 8755 e1 = torch.rand(7, 3, 3)[:, 1, ...] 8756 e2 = e1.clone() 8757 for i in range(3): 8758 e2[i][i] = v 8759 e2[i + 4][i] = v 8760 e1.fill_diagonal_(v, wrap=True) 8761 self.assertEqual(e1, e2) 8762 8763 def test_setting_real_imag_to_a_number(self): 8764 x = torch.randn(4, dtype=torch.cfloat) 8765 x.real = 0 8766 x.imag = 0 8767 zeros = torch.zeros(4) 8768 self.assertEqual(x.real, zeros) 8769 self.assertEqual(x.imag, zeros) 8770 8771 def test_batch_norm_cpu_inference(self): 8772 # input nchw in (2,1,1,1), (2,2,2,2) 8773 inputs = [ 8774 torch.tensor([[[[-0.5000]]], [[[0.5000]]]]), 8775 torch.tensor([ 8776 [ 8777 [[-0.5000, 0.5000], [-1.0000, 1.0000]], 8778 [[-0.2500, -0.5000], [0.2500, 0.5000]] 8779 ], 8780 [ 8781 [[0.1000, 1.0000], [1.0000, 0.1000]], 8782 [[1.0000, 0.5000], [1.5000, -1.5000]] 8783 ]])] 8784 # output nchw in (2,1,1,1), (2,2,2,2) 8785 outputs = [ 8786 torch.tensor([ 8787 [[[-0.499997496604919433593750000]]], 8788 [[[0.499997496604919433593750000]]]]), 8789 torch.tensor([ 8790 [[[-0.499997496604919433593750000, 0.499997496604919433593750000], 8791 [-0.999994993209838867187500000, 0.999994993209838867187500000]], 8792 [[-0.249998748302459716796875000, -0.499997496604919433593750000], 8793 [0.249998748302459716796875000, 0.499997496604919433593750000]]], 8794 [[[0.099999502301216125488281250, 0.999994993209838867187500000], 8795 [0.999994993209838867187500000, 0.099999502301216125488281250]], 8796 [[0.999994993209838867187500000, 0.499997496604919433593750000], 8797 [1.499992489814758300781250000, -1.499992489814758300781250000]]]])] 8798 8799 8800 for i in range(len(inputs)): 8801 for affine in [False, True]: 8802 m = torch.nn.BatchNorm2d(inputs[i].size()[1], 1e-05, 0.1, affine=affine) 8803 m.eval() 8804 # contiguous case 8805 input1 = inputs[i].contiguous() 8806 output1 = m(input1) 8807 # non-contiguous case 8808 input2 = input1.permute(0, 1, 3, 2) 8809 output2 = m(input2).permute(0, 1, 3, 2) 8810 # channels last case 8811 input3 = input1.contiguous(memory_format=torch.channels_last) 8812 output3 = m(input3) 8813 self.assertEqual(output3, outputs[i]) 8814 self.assertEqual(output3, output1) 8815 self.assertEqual(output3, output2) 8816 8817 # FIXME: move these meta tests to their own test suite/class or 8818 # distribute them among the appropriate test suites for their ops 8819 @skipIfTorchDynamo("Fails after Triton update, see https://github.com/pytorch/pytorch/issues/94687") 8820 def test_empty_meta(self): 8821 x = torch.empty(2 ** 20, 2 ** 20, device='meta') 8822 y = torch.empty(2 ** 20, device='meta') 8823 z = x + y 8824 self.assertEqual(z.size(), (2 ** 20, 2 ** 20)) 8825 self.assertRaises(RuntimeError, lambda: z[0][0].item()) 8826 8827 @skipIfTorchDynamo("Fails after Triton update, see https://github.com/pytorch/pytorch/issues/94687") 8828 def test_format_scalar_meta(self): 8829 x = torch.empty((), device='meta') 8830 self.assertEqual(format(x), repr(x)) 8831 8832 def test_upsample_nearest1d_meta(self): 8833 # TODO: this test should be triggered by test_nn.py but right 8834 # now meta is not enabled (and even if it was, we are probably 8835 # missing too many meta functions to get through the test unmolested) 8836 8837 # NB: Can't make the exponent too big, or it will overflow 8838 # signed 64-bit integer 8839 x = torch.empty(2 * 10 ** 8, 3, 2 * 10 ** 8, device='meta') 8840 z = torch.nn.functional.interpolate(x, scale_factor=2) 8841 self.assertEqual(z.size(), (2 * 10 ** 8, 3, 4 * 10 ** 8)) 8842 self.assertRaises(RuntimeError, lambda: z[0][0][0].item()) 8843 8844 # TODO: the out tests cannot be triggered by test_nn.py because 8845 # we don't actually do out= arguments for nn functions, so there 8846 # is no public API by which to get the out version 8847 8848 # interpolate doesn't seem to support out= 8849 # (not sure why passing None here doesn't work? How strange...) 8850 z = torch.empty(0, device='meta') 8851 torch._C._nn.upsample_nearest1d(x, (4 * 10 ** 8,), 2, out=z) 8852 self.assertEqual(z.size(), (2 * 10 ** 8, 3, 4 * 10 ** 8)) 8853 self.assertRaises(RuntimeError, lambda: z[0][0][0].item()) 8854 8855 def test_upsample_nearest2d_meta(self): 8856 # TODO: the out tests cannot be triggered by test_nn.py because 8857 # we don't actually do out= arguments for nn functions, so there 8858 # is no public API by which to get the out version 8859 8860 # Make sure we don't clobber strides of out tensor. NB: this 8861 # test must be done on 2d/3d, because 1d doesn't have any meaningful 8862 # layout support 8863 x = torch.empty(4, 3, 8, 8, device='meta') 8864 out = torch.empty(4, 3, 16, 16, device='meta', memory_format=torch.channels_last) 8865 torch._C._nn.upsample_nearest2d(x, (16, 16), out=out) 8866 self.assertTrue(out.is_contiguous(memory_format=torch.channels_last)) 8867 8868 x = torch.empty(4, 3, 8, 8, device='meta', memory_format=torch.channels_last) 8869 out = torch.empty(4, 3, 16, 16, device='meta') 8870 torch._C._nn.upsample_nearest2d(x, (16, 16), out=out) 8871 self.assertTrue(out.is_contiguous()) 8872 8873 # But if resize occurs, do clobber 8874 x = torch.empty(4, 3, 8, 8, device='meta', memory_format=torch.channels_last) 8875 out = torch.empty(0, device='meta') 8876 torch._C._nn.upsample_nearest2d(x, (16, 16), out=out) 8877 self.assertTrue(out.is_contiguous(memory_format=torch.channels_last)) 8878 8879 # Complain if out dtype mismatch 8880 x = torch.empty(4, 3, 8, 8, device='meta', dtype=torch.float) 8881 out = torch.empty(4, 3, 16, 16, device='meta', dtype=torch.double) 8882 self.assertExpectedRaisesInline( 8883 RuntimeError, lambda: torch._C._nn.upsample_nearest2d(x, (16, 16), out=out), 8884 """Expected out tensor to have dtype torch.float32 but got torch.float64 instead""" 8885 ) 8886 8887 # Complain if out device mismatch 8888 x = torch.empty(0, 3, 8, 8, device='meta') 8889 out = torch.empty(0, 3, 16, 16, device='cpu') 8890 # FIXME: compiling should properly error with a device mismatch. 8891 if not TEST_WITH_TORCHINDUCTOR: 8892 self.assertExpectedRaisesInline( 8893 RuntimeError, lambda: torch._C._nn.upsample_nearest2d(x, (16, 16), out=out), 8894 """Attempting to copy from device meta to device cpu, but cross-device copies are not allowed!""" 8895 ) 8896 8897 def test_add_meta_scalar(self): 8898 # From https://github.com/pytorch/pytorch/issues/53815 8899 x = torch.empty(2, device='meta') 8900 y = x + 2 8901 self.assertEqual(y.size(), x.size()) 8902 8903 def test_normal_shape(self): 8904 warned = False 8905 for device in get_all_device_types(): 8906 tensor1 = torch.rand(1, device=device) 8907 tensor4 = torch.rand(4, device=device) 8908 tensor120 = torch.rand(120, device=device) 8909 tensor2145 = torch.rand(2, 1, 4, 5, device=device) 8910 tensor2345 = torch.rand(2, 3, 4, 5, device=device) 8911 tensor2345_non_contiguous = torch.rand(2, 4, 3, 5, device=device).permute(0, 2, 1, 3) 8912 tensor2345_channels_last = tensor2345.contiguous(memory_format=torch.channels_last) 8913 output2345 = torch.zeros(2, 3, 4, 5, device=device) 8914 output345 = torch.zeros(3, 4, 5, device=device) 8915 8916 # inputs have same size 8917 self.assertEqual(torch.normal(tensor2345, tensor2345).size(), (2, 3, 4, 5)) 8918 self.assertEqual(torch.normal(tensor2345_non_contiguous, tensor2345).size(), (2, 3, 4, 5)) 8919 self.assertEqual(torch.normal(tensor2345, tensor2345_channels_last).size(), (2, 3, 4, 5)) 8920 self.assertEqual(torch.normal(tensor2345_non_contiguous, tensor2345_channels_last).size(), (2, 3, 4, 5)) 8921 8922 # scalar case 8923 self.assertEqual(torch.normal(tensor2345, 2).size(), (2, 3, 4, 5)) 8924 self.assertEqual(torch.normal(2, tensor2345).size(), (2, 3, 4, 5)) 8925 8926 # inputs are expandable tensors 8927 self.assertEqual(torch.normal(tensor2345, tensor1).size(), (2, 3, 4, 5)) 8928 self.assertEqual(torch.normal(tensor2145, tensor2345).size(), (2, 3, 4, 5)) 8929 8930 # inputs are non-expandable tensors, but they have same number of elements 8931 with self.assertRaisesRegex( 8932 RuntimeError, 8933 r"The size of tensor a \(120\) must match the size of " 8934 r"tensor b \(5\) at non-singleton dimension 3"): 8935 self.assertEqual(torch.normal(tensor120, tensor2345).size(), (120,)) 8936 with self.assertRaisesRegex( 8937 RuntimeError, 8938 r"The size of tensor a \(5\) must match the size of " 8939 r"tensor b \(120\) at non-singleton dimension 3"): 8940 self.assertEqual(torch.normal(tensor2345, tensor120).size(), (2, 3, 4, 5)) 8941 8942 # inputs are non-expandable tensors and they don't have same number of elements 8943 with self.assertRaisesRegex( 8944 RuntimeError, 8945 r"The size of tensor a \(5\) must match the size of " 8946 r"tensor b \(4\) at non-singleton dimension 3"): 8947 torch.normal(tensor2345, tensor4) 8948 8949 # output and inputs are size compatible 8950 self.assertEqual(torch.normal(tensor2345, tensor2345, out=output2345).size(), (2, 3, 4, 5)) 8951 8952 # output and inputs are not size compatible 8953 with self.assertWarnsRegex( 8954 UserWarning, 8955 "This behavior is deprecated, and in a future PyTorch " 8956 "release outputs will not be resized unless they have " 8957 "zero elements"): 8958 self.assertEqual(torch.normal(tensor2345, tensor2145, out=output345).size(), (2, 3, 4, 5)) 8959 with self.assertRaisesRegex( 8960 RuntimeError, 8961 r"The size of tensor a \(5\) must match the size of " 8962 r"tensor b \(120\) at non-singleton dimension 3"): 8963 # inputs are not expandable, output size is not the same as mean 8964 torch.normal(tensor2345, tensor120, out=output345) 8965 8966 def test_tensoriterator_output_setup(self): 8967 # Test whether the output's memory layout is correct 8968 def test_memory_layout(x, y, scale, zero_point, out): 8969 self.assertEqual(x.dim(), 4) 8970 self.assertEqual(x.size(), y.size()) 8971 self.assertEqual(y.size(), out.size()) 8972 8973 shape = x.size() 8974 for n in range(shape[0]): 8975 for c in range(shape[1]): 8976 for h in range(shape[2]): 8977 for w in range(shape[3]): 8978 if scale is not None and zero_point is not None: 8979 self.assertEqual( 8980 out[n][c][h][w], 8981 torch.ops.quantized.add(x[n][c][h][w], y[n][c][h][w], scale, zero_point)) 8982 else: 8983 self.assertEqual(out[n][c][h][w], x[n][c][h][w] + y[n][c][h][w]) 8984 8985 xraw = torch.rand(2, 3, 4, 4) 8986 yraw = torch.rand(2, 3, 4, 4) 8987 qxraw = torch.quantize_per_tensor(xraw, 0.1, 5, torch.quint8) 8988 qyraw = torch.quantize_per_tensor(yraw, 0.1, 5, torch.quint8) 8989 8990 # contiguous case fast setup 8991 test_memory_layout(xraw, yraw, None, None, xraw + yraw) 8992 test_memory_layout(qxraw, qyraw, 0.1, 5, torch.ops.quantized.add(qxraw, qyraw, 0.1, 5)) 8993 8994 # channels last case fast setup 8995 x = xraw.contiguous(memory_format=torch.channels_last) 8996 y = yraw.contiguous(memory_format=torch.channels_last) 8997 test_memory_layout(x, y, None, None, x + y) 8998 qx = qxraw.contiguous(memory_format=torch.channels_last) 8999 qy = qyraw.contiguous(memory_format=torch.channels_last) 9000 test_memory_layout(qx, qy, 0.1, 5, torch.ops.quantized.add(qx, qy, 0.1, 5)) 9001 9002 # non contiguous case fast setup (dense, non-overlapping, same shape and strides) 9003 x = xraw.permute(0, 2, 3, 1) 9004 y = yraw.permute(0, 2, 3, 1) 9005 test_memory_layout(x, y, None, None, x + y) 9006 qx = qxraw.permute(0, 2, 3, 1) 9007 qy = qyraw.permute(0, 2, 3, 1) 9008 test_memory_layout(qx, qy, 0.1, 5, torch.ops.quantized.add(qx, qy, 0.1, 5)) 9009 9010 # non contiguous case fast setup (dense, non-overlapping) 9011 # input tensors have same shape and strides 9012 # output tensor have same shape as input tensors but different stride 9013 # output tensor should preserve its strides in this case 9014 x = xraw.permute(0, 2, 3, 1) 9015 y = yraw.permute(0, 2, 3, 1) 9016 out = torch.empty_like(xraw) 9017 out = out.permute(0, 3, 2, 1) 9018 expected_stride = out.stride() 9019 test_memory_layout(x, y, None, None, torch.add(x, y, out=out)) 9020 self.assertEqual(expected_stride, out.stride()) 9021 9022 # non contiguous case non fast setup 9023 x = xraw.permute(0, 2, 3, 1) 9024 y = yraw.permute(0, 3, 2, 1) 9025 test_memory_layout(x, y, None, None, x + y) 9026 qx = qxraw.permute(0, 2, 3, 1) 9027 qy = qyraw.permute(0, 3, 2, 1) 9028 test_memory_layout(qx, qy, 0.1, 5, torch.ops.quantized.add(qx, qy, 0.1, 5)) 9029 9030 # Tests to make sure we still handle .data properly until it is removed 9031 def test_dot_data_use(self): 9032 # .data allows to change the Tensors types inplace, check that we still 9033 # raise a nice error. 9034 with self.assertRaisesRegex( 9035 RuntimeError, 9036 # message includes both Double and ComplexFloat 9037 '(?=.*Double)(?=.*ComplexFloat)'): 9038 9039 # Calls model with a LongTensor input but DoubleTensor weights 9040 input = torch.randn(1, 1, 1, 6, dtype=torch.double) 9041 weight = torch.zeros(1, 1, 1, 3, dtype=torch.complex64) 9042 model = torch.nn.Conv2d(1, 1, (1, 3), stride=1, padding=0, bias=False) 9043 model.weight.data = weight 9044 out = model(input) 9045 9046 def test_empty_storage_view(self): 9047 # we should be able to "modify" slices of a 0-element 9048 # array without an error being raised due to 9049 # trying to resize its storage 9050 t = torch.from_numpy(np.empty((0, 4))) 9051 t[:, 1::2] *= 1 9052 9053 def test_has_storage(self): 9054 self.assertIsNotNone(torch.tensor([]).storage()) 9055 self.assertIsNotNone(torch.empty(0).storage()) 9056 self.assertIsNotNone(torch.tensor([]).clone().storage()) 9057 self.assertIsNotNone(torch.tensor([0, 0, 0]).nonzero().storage()) 9058 self.assertIsNotNone(torch.tensor([]).new().storage()) 9059 9060 # FIXME: Extend this test and put in a TensorProperties test class 9061 def test_numel(self): 9062 b = torch.ByteTensor(3, 100, 100) 9063 self.assertEqual(b.nelement(), 3 * 100 * 100) 9064 self.assertEqual(b.numel(), 3 * 100 * 100) 9065 9066 # Verifies that (deep)copies of dtypes are the same objects 9067 def test_copy_dtypes(self): 9068 for dtype in all_types_and_complex_and(torch.half, torch.bfloat16, torch.bool): 9069 copied_dtype = copy.deepcopy(dtype) 9070 self.assertIs(dtype, copied_dtype) 9071 9072 def test_dtype_is_signed(self): 9073 for dtype in all_types_and_complex_and(torch.half, torch.bfloat16, torch.half): 9074 self.assertEqual(dtype.is_signed, torch.is_signed(torch.tensor(0, dtype=dtype))) 9075 9076 self.assertRaisesRegex(RuntimeError, 'not supported for quantized', lambda: torch.quint8.is_signed) 9077 self.assertRaisesRegex(RuntimeError, 'not supported for quantized', lambda: torch.qint8.is_signed) 9078 self.assertRaisesRegex(RuntimeError, 'not supported for quantized', lambda: torch.qint32.is_signed) 9079 9080 # FIXME: Put the following random tests into their own test class or test suite 9081 @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") 9082 def test_RNGState(self): 9083 state = torch.get_rng_state() 9084 stateCloned = state.clone() 9085 before = torch.rand(1000) 9086 9087 self.assertEqual(state.ne(stateCloned).long().sum(), 0, atol=0, rtol=0) 9088 9089 torch.set_rng_state(state) 9090 after = torch.rand(1000) 9091 self.assertEqual(before, after, atol=0, rtol=0) 9092 9093 @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") 9094 def test_RNGStateAliasing(self): 9095 # Fork the random number stream at this point 9096 gen = torch.Generator() 9097 gen.set_state(torch.get_rng_state()) 9098 self.assertEqual(gen.get_state(), torch.get_rng_state()) 9099 9100 target_value = torch.rand(1000) 9101 # Dramatically alter the internal state of the main generator 9102 _ = torch.rand(100000) 9103 forked_value = torch.rand(1000, generator=gen) 9104 self.assertEqual(target_value, forked_value, atol=0, rtol=0, msg="RNG has not forked correctly.") 9105 9106 @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") 9107 def test_RNG_after_pickle(self): 9108 torch.random.manual_seed(100) 9109 before = torch.rand(10) 9110 9111 torch.random.manual_seed(100) 9112 buf = io.BytesIO() 9113 tensor = torch.tensor([1, 2, 3]) 9114 ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(tensor) 9115 after = torch.rand(10) 9116 9117 self.assertEqual(before, after, atol=0, rtol=0) 9118 9119 @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") 9120 def test_boxMullerState(self): 9121 torch.manual_seed(123) 9122 odd_number = 101 9123 seeded = torch.randn(odd_number) 9124 state = torch.get_rng_state() 9125 midstream = torch.randn(odd_number) 9126 torch.set_rng_state(state) 9127 repeat_midstream = torch.randn(odd_number) 9128 torch.manual_seed(123) 9129 reseeded = torch.randn(odd_number) 9130 self.assertEqual(midstream, repeat_midstream, atol=0, rtol=0, 9131 msg='get_rng_state/set_rng_state not generating same sequence of normally distributed numbers') 9132 self.assertEqual(seeded, reseeded, atol=0, rtol=0, 9133 msg='repeated calls to manual_seed not generating same sequence of normally distributed numbers') 9134 9135 @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") 9136 def test_manual_seed(self): 9137 rng_state = torch.get_rng_state() 9138 torch.manual_seed(2) 9139 x = torch.randn(100) 9140 self.assertEqual(torch.initial_seed(), 2) 9141 torch.manual_seed(2) 9142 y = torch.randn(100) 9143 self.assertEqual(x, y) 9144 9145 max_int64 = 0x7fff_ffff_ffff_ffff 9146 min_int64 = -max_int64 - 1 9147 max_uint64 = 0xffff_ffff_ffff_ffff 9148 # Check all boundary cases of valid seed value inputs 9149 test_cases = [ 9150 # (seed, expected_initial_seed) 9151 # Positive seeds should be unchanged 9152 (max_int64, max_int64), 9153 (max_int64 + 1, max_int64 + 1), 9154 (max_uint64, max_uint64), 9155 (0, 0), 9156 # Negative seeds wrap around starting from the largest seed value 9157 (-1, max_uint64), 9158 (min_int64, max_int64 + 1) 9159 ] 9160 for seed, expected_initial_seed in test_cases: 9161 torch.manual_seed(seed) 9162 actual_initial_seed = torch.initial_seed() 9163 msg = (f"expected initial_seed() = {expected_initial_seed:x} " 9164 f"after calling manual_seed({seed:x}), but got {actual_initial_seed:x} instead") 9165 self.assertEqual(expected_initial_seed, actual_initial_seed, msg=msg) 9166 for invalid_seed in [min_int64 - 1, max_uint64 + 1]: 9167 with self.assertRaisesRegex(RuntimeError, r'Overflow when unpacking long'): 9168 torch.manual_seed(invalid_seed) 9169 9170 torch.set_rng_state(rng_state) 9171 9172 # FIXME: Describe this test and port to the generic device framework in a more 9173 # appropriate test suite for the copy operation 9174 def test_copy_transpose(self): 9175 x = torch.arange(100 * 100, dtype=torch.float).reshape(100, 100).t() 9176 y = torch.empty(100, 100, dtype=torch.float) 9177 y.copy_(x) 9178 self.assertEqual(y[:, 0], range(100)) 9179 self.assertEqual(y[:, 40], range(4000, 4100)) 9180 9181 y = torch.empty(100, 100, dtype=torch.double) 9182 y.copy_(x) 9183 self.assertEqual(y[:, 0], range(100)) 9184 self.assertEqual(y[:, 40], range(4000, 4100)) 9185 9186 # Validates regression reported in https://github.com/pytorch/pytorch/issues/45269 9187 x = torch.arange(100 * 100).reshape(100, 100).to(dtype=torch.cfloat).t() 9188 y = torch.empty(100, 100, dtype=torch.cfloat) 9189 y.copy_(x) 9190 self.assertEqual(y[:, 0], range(100)) 9191 self.assertEqual(y[:, 40], range(4000, 4100)) 9192 9193 x = torch.arange(100 * 100).reshape(100, 100).to(dtype=torch.complex32).t() 9194 y = torch.empty(100, 100, dtype=torch.complex32) 9195 y.copy_(x) 9196 self.assertEqual(y[:, 0], range(100)) 9197 self.assertEqual(y[:, 40], range(4000, 4100)) 9198 9199 # FIXME: Port to a more appropriate test suite 9200 def test_copy_broadcast(self): 9201 torch.zeros(5, 6).copy_(torch.zeros(6)) 9202 self.assertRaises(RuntimeError, lambda: torch.zeros(5, 6).copy_(torch.zeros(30))) 9203 9204 # FIXME: Port to a more appropriate test suite 9205 # Fails with inductor (and aot_eager) because functionalization replaces copy_ with copy, 9206 # which doesn't properly error on bad inputs. 9207 def test_copy_many_to_one(self): 9208 # Testing in-place copy where it attempt to write from many memory 9209 # storage to a single storage would cause RuntimeError to be thrown 9210 self.assertRaises(RuntimeError, lambda: torch.zeros(1, 6).expand(5, 6).copy_(torch.zeros(5, 6))) 9211 9212 def test_copy_float16(self): 9213 # Check that fbgemm code no longer reads memory out of bounds, see 9214 # copy_impl and fbgemm::Float16ToFloat_ref. 9215 # https://github.com/pytorch/pytorch/issues/88543 9216 9217 # Types to test different code paths in copy_impl. 9218 dtypes = ( 9219 # out_dtype, src_dtype 9220 (torch.float32, torch.float16), # fbgemm 9221 (torch.float16, torch.float32), # fbgemm 9222 (torch.float32, torch.float32), # TensorIterator 9223 ) 9224 9225 cases = ( 9226 # out_shape, src_shape, is_ok 9227 # These cases used to crash with fbgemm, make sure these also raise 9228 # exceptions with TensorIterator. 9229 ((1, 2, 3), (0, 2, 3), False), # same strides, not allowed by TI 9230 ((1, 5, 6), (4, 5, 6), False), # same strides, not allowed by TI 9231 (1, (0, 2, 3), False), # different strides 9232 ((4, 5, 6), (0, 2, 3), False), # different strides 9233 ((4, 5, 6), (1, 2, 3), False), # different strides 9234 ((4, 5, 6), (6, 5, 4), False), # same numel 9235 9236 # These cases should pass with fbgemm and TensorIterator. 9237 ((4, 5, 6), (1, 5, 6), True), # same strides 9238 ((4, 5, 6), (4, 5, 6), True), # same strides 9239 ((0, 2, 3), 1, True), # different strides, allowed by TI 9240 ((4, 5, 6), (4, 5, 1), True), # different strides, allowed by TI 9241 ) 9242 9243 for (out_shape, src_shape, is_ok), (out_dtype, src_dtype) in itertools.product(cases, dtypes): 9244 out = torch.zeros(out_shape, dtype=out_dtype, device=torch.device('cpu')) 9245 src = torch.ones(src_shape, dtype=src_dtype, device=torch.device('cpu')) 9246 if is_ok: 9247 if torch.cuda.is_available(): 9248 out_cuda = out.cuda() 9249 src_cuda = src.cuda() 9250 res = out.copy_(src) 9251 if torch.cuda.is_available(): 9252 res_cuda = out_cuda.copy_(src_cuda) 9253 self.assertEqual(res, res_cuda) 9254 else: 9255 self.assertRaises(RuntimeError, lambda: out.copy_(src)) 9256 9257 # FIXME: Port to a more appropriate test suite 9258 def _test_to_with_layout(self, layout): 9259 def test_copy_behavior(t, non_blocking=False): 9260 self.assertIs(t, t.to(t, non_blocking=non_blocking)) 9261 self.assertIs(t, t.to(t.dtype, non_blocking=non_blocking)) 9262 self.assertIs(t, t.to(torch.empty_like(t), non_blocking=non_blocking)) 9263 self.assertIsNot(t, t.to(t, non_blocking=non_blocking, copy=True)) 9264 self.assertIsNot(t, t.to(t.dtype, non_blocking=non_blocking, copy=True)) 9265 self.assertIsNot(t, t.to(torch.empty_like(t), non_blocking=non_blocking, copy=True)) 9266 9267 devices = [t.device] 9268 if t.device.type == 'cuda': 9269 if t.device.index == -1: 9270 devices.append(f'cuda:{torch.cuda.current_device()}') 9271 elif t.device.index == torch.cuda.current_device(): 9272 devices.append('cuda') 9273 for device in devices: 9274 self.assertIs(t, t.to(device, non_blocking=non_blocking)) 9275 self.assertIs(t, t.to(device, t.dtype, non_blocking=non_blocking)) 9276 self.assertIsNot(t, t.to(device, non_blocking=non_blocking, copy=True)) 9277 self.assertIsNot(t, t.to(device, t.dtype, non_blocking=non_blocking, copy=True)) 9278 9279 a = torch.tensor(5) 9280 if layout == torch.sparse_csr: 9281 a = torch.tensor([[0, 1, 2], [2, 0, 3]]).to_sparse_csr() 9282 test_copy_behavior(a) 9283 self.assertEqual(a.device, a.to('cpu').device) 9284 self.assertEqual(a.device, a.to('cpu', dtype=torch.float32).device) 9285 self.assertIs(torch.float32, a.to('cpu', dtype=torch.float32).dtype) 9286 self.assertEqual(a.device, a.to(torch.float32).device) 9287 self.assertIs(torch.float32, a.to(dtype=torch.float32).dtype) 9288 9289 def test_data_ptr(getter): 9290 self.assertEqual(getter(a), getter(a.to('cpu'))) 9291 self.assertEqual(getter(a), getter(a.to(dtype=a.dtype, device=a.device, copy=False))) 9292 self.assertEqual(getter(a), getter(a.to('cpu', copy=False))) 9293 self.assertNotEqual(getter(a), getter(a.to('cpu', copy=True))) 9294 if layout == torch.sparse_csr: 9295 # TODO: compressed sparse tensors currently don't support data_ptr. 9296 # Exercising failure will allow us to widen coverage of this test once it does. 9297 with self.assertRaisesRegex(RuntimeError, "Cannot access data pointer of Tensor that doesn't have storage"): 9298 a.data_ptr() 9299 # While compressed sparse tensors don't have a concept of data_ptr 9300 # the underlying tensors do. The implementation of to appropriately forwards 9301 # the call to the components, which is what we're test here. 9302 test_data_ptr(lambda a: a.values().data_ptr()) 9303 test_data_ptr(lambda a: a.crow_indices().data_ptr()) 9304 test_data_ptr(lambda a: a.col_indices().data_ptr()) 9305 else: 9306 test_data_ptr(lambda a: a.data_ptr()) 9307 9308 if torch.cuda.is_available(): 9309 for non_blocking in [True, False]: 9310 for cuda in ['cuda', 'cuda:0' if torch.cuda.device_count() == 1 else 'cuda:1']: 9311 b = torch.tensor(5., device=cuda) 9312 test_copy_behavior(b, non_blocking) 9313 self.assertEqual(b.device, b.to(cuda, non_blocking=non_blocking).device) 9314 self.assertEqual(a.device, b.to('cpu', non_blocking=non_blocking).device) 9315 self.assertEqual(b.device, a.to(cuda, non_blocking=non_blocking).device) 9316 self.assertIs(torch.int32, b.to('cpu', dtype=torch.int32, non_blocking=non_blocking).dtype) 9317 self.assertEqual(a.device, b.to('cpu', dtype=torch.int32, non_blocking=non_blocking).device) 9318 self.assertIs(torch.int32, b.to(dtype=torch.int32).dtype) 9319 self.assertEqual(b.device, b.to(dtype=torch.int32).device) 9320 9321 def test_to(self): 9322 self._test_to_with_layout(torch.strided) 9323 is_cuda10_2_or_higher = ( 9324 (torch.version.cuda is not None) 9325 and ([int(x) for x in torch.version.cuda.split(".")] >= [10, 2])) 9326 if is_cuda10_2_or_higher: # in cuda10_1 sparse_csr is beta 9327 self._test_to_with_layout(torch.sparse_csr) 9328 9329 # FIXME: describe this test 9330 def test_as_subclass(self): 9331 class SubTensor(torch.Tensor): 9332 member_var = object() 9333 9334 t0 = torch.tensor(0) 9335 t1 = torch.tensor([1, 2]) 9336 t2 = torch.tensor([[3, 4], [5, 6]]) 9337 9338 s0 = t0.as_subclass(SubTensor) 9339 s1 = t1.as_subclass(SubTensor) 9340 s2 = t2.as_subclass(SubTensor) 9341 9342 # Check that the correct type is returned. 9343 self.assertTrue(type(s0) is SubTensor) 9344 self.assertTrue(type(s1) is SubTensor) 9345 self.assertTrue(type(s2) is SubTensor) 9346 9347 # Check that the data is equal. 9348 self.assertEqual(t0, s0) 9349 self.assertEqual(t1, s1) 9350 self.assertEqual(t2, s2) 9351 9352 t0[()] = 1 9353 t1[1] = 3 9354 t2[1, 1] = 7 9355 9356 # Check that the data is equal even after modification. 9357 self.assertEqual(t0, s0) 9358 self.assertEqual(t1, s1) 9359 self.assertEqual(t2, s2) 9360 9361 # Check that member variables are passed through. 9362 self.assertTrue(s0.member_var is SubTensor.member_var) 9363 self.assertTrue(s1.member_var is SubTensor.member_var) 9364 self.assertTrue(s2.member_var is SubTensor.member_var) 9365 9366 # Test that autograd is propagated. 9367 t = torch.tensor(5, dtype=torch.float32, requires_grad=True) 9368 9369 # Run a calculation on the tensor. 9370 exp_t = torch.exp(t) 9371 9372 # Cast exp_t to a subclass. 9373 exp_s = exp_t.as_subclass(SubTensor) 9374 9375 # Make sure that t.grad was initially None 9376 self.assertTrue(t.grad is None) 9377 9378 # Run the autograd calculation. 9379 exp_s.backward() 9380 9381 # Make sure autograd was propagated to the original tensor 9382 # declared with requires_grad. 9383 self.assertTrue(t.grad is not None) 9384 9385 # Make sure invalid subclasses raise nice errors 9386 class BadSubTensor: 9387 member_var = object() 9388 9389 err_msg = "Creating a Tensor subclass from a class that does not inherit from Tensor" 9390 with self.assertRaisesRegex(RuntimeError, err_msg): 9391 s0 = t0.as_subclass(BadSubTensor) 9392 9393 # FIXME: Port to a test suite that better fits slicing 9394 def test_slice(self): 9395 empty = torch.empty(0, 4) 9396 x = torch.arange(0., 16).view(4, 4) 9397 self.assertEqual(x[:], x) 9398 self.assertEqual(x[:4], x) 9399 # start and stop are clamped to the size of dim 9400 self.assertEqual(x[:5], x) 9401 # if start >= stop then the result is empty 9402 self.assertEqual(x[2:1], empty) 9403 self.assertEqual(x[2:2], empty) 9404 # out of bounds is also empty 9405 self.assertEqual(x[10:12], empty) 9406 # additional correctness checks 9407 self.assertEqual(x[:1].tolist(), [[0, 1, 2, 3]]) 9408 self.assertEqual(x[:-3].tolist(), [[0, 1, 2, 3]]) 9409 self.assertEqual(x[:, -2:3].tolist(), [[2], [6], [10], [14]]) 9410 self.assertEqual(x[0:-1:2].tolist(), [[0, 1, 2, 3], [8, 9, 10, 11]]) 9411 9412 def test_split_with_sizes_copy_out(self): 9413 device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") 9414 shape = (30, 40, 50) 9415 x = torch.rand(*shape, device=device) 9416 cases = [ 9417 (0, [3, 7, 8, 12]), 9418 (1, [3, 7, 10, 20]), 9419 (-2, [3, 7, 10, 20]), 9420 (2, [3, 7, 10, 12, 18]), 9421 (-1, [3, 7, 10, 12, 18]), 9422 (2, [3, 7, 10, 0, 30]), 9423 ] 9424 for dim, split_sizes in cases: 9425 views = x.split_with_sizes(split_sizes, dim=dim) 9426 expects = [v.clone() for v in views] 9427 out = [torch.zeros_like(v) for v in views] 9428 for expect, t in zip(expects, out): 9429 if expect.numel() != 0: 9430 self.assertFalse(expect.eq(t).all().item()) 9431 9432 torch.split_with_sizes_copy(x, split_sizes, dim=dim, out=out) 9433 for expect, t in zip(expects, out): 9434 self.assertTrue(expect.eq(t).all().item()) 9435 9436 if not torch.cuda.is_available(): 9437 continue 9438 9439 # Test with cuda graph 9440 out = [torch.zeros_like(v) for v in views] 9441 for expect, t in zip(expects, out): 9442 if expect.numel() != 0: 9443 self.assertFalse(expect.eq(t).all().item()) 9444 9445 g = torch.cuda.CUDAGraph() 9446 with torch.cuda.graph(g): 9447 torch.split_with_sizes_copy(x, split_sizes, dim=dim, out=out) 9448 9449 g.replay() 9450 for expect, t in zip(expects, out): 9451 self.assertTrue(expect.eq(t).all().item()) 9452 9453 def test_type(self): 9454 x = torch.randn(3, 3).double() 9455 self.assertEqual(x.type('torch.FloatTensor').dtype, torch.float32) 9456 self.assertEqual(x.type(torch.FloatTensor).dtype, torch.float32) 9457 self.assertEqual(x.int().type(torch.Tensor).dtype, torch.get_default_dtype()) 9458 self.assertEqual(x.type(torch.int32).dtype, torch.int32) 9459 9460 # FIXME: port to a quantization test suite 9461 def test_qengine(self): 9462 qengines = torch.backends.quantized.supported_engines 9463 original_qe = torch.backends.quantized.engine 9464 for qe in qengines: 9465 torch.backends.quantized.engine = qe 9466 assert torch.backends.quantized.engine == qe, 'qengine not set successfully' 9467 torch.backends.quantized.engine = original_qe 9468 9469 def test_terminate_handler_on_crash(self): 9470 cmd = [sys.executable, '-c', "import os; os.environ[\"TORCH_CUSTOM_TERMINATE\"] ='1'; \ 9471 import torch; import torch._C; torch._C._abort()"] 9472 with self.assertRaises(subprocess.CalledProcessError) as cm: 9473 subprocess.check_output(cmd, shell=False) 9474 e = cm.exception 9475 output = e.stdout.decode("utf-8") 9476 self.assertNotEqual(e.returncode, 0) 9477 self.assertNotEqual(output, None) 9478 self.assertIn('Unhandled exception caught in c10/util/AbortHandler.h', output) 9479 9480 # FIXME: port to a distributed test suite -- also... how could this be OOMing on Windows CUDA? 9481 @slowTest 9482 @unittest.skipIf(NO_MULTIPROCESSING_SPAWN, "Disabled for environments that \ 9483 don't support multiprocessing with spawn start method") 9484 @unittest.skipIf(IS_WINDOWS, 'FIXME: CUDA OOM error on Windows') 9485 def test_multinomial_invalid_probs(self): 9486 def _spawn_method(self, method, arg): 9487 try: 9488 mp.set_start_method('spawn') 9489 except RuntimeError: 9490 pass 9491 with mp.Pool(1) as pool: 9492 out = pool.map(method, [arg]) 9493 self.assertTrue(out[0]) 9494 9495 def _test_multinomial_invalid_probs(probs): 9496 try: 9497 # n_sample = 1 is a special case, test n_sample=2 which is more general 9498 torch.multinomial(probs.to('cpu'), 2) 9499 return False # Should not be reached 9500 except RuntimeError as e: 9501 return 'probability tensor contains either `inf`, `nan` or element < 0' in str(e) 9502 9503 _spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., -1., 1.])) 9504 _spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., inf, 1.])) 9505 _spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., -inf, 1.])) 9506 _spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., 1., nan])) 9507 9508 # FIXME: port to more appropriate test suite 9509 def test_to_with_tensor(self): 9510 a = torch.tensor(5) 9511 self.assertEqual(a.device, a.to(a).device) 9512 9513 if torch.cuda.is_available(): 9514 for non_blocking in [True, False]: 9515 for cuda in ['cuda', 'cuda:0' if torch.cuda.device_count() == 1 else 'cuda:1']: 9516 b = torch.tensor(5., device=cuda) 9517 self.assertEqual(b.device, b.to(b, non_blocking=non_blocking).device) 9518 self.assertEqual(a.device, b.to(a, non_blocking=non_blocking).device) 9519 self.assertEqual(b.device, a.to(b, non_blocking=non_blocking).device) 9520 9521 def test_device(self): 9522 cpu = torch.device('cpu') 9523 self.assertEqual('cpu', str(cpu)) 9524 self.assertEqual('cpu', cpu.type) 9525 self.assertEqual(None, cpu.index) 9526 9527 cpu0 = torch.device('cpu:0') 9528 self.assertEqual('cpu:0', str(cpu0)) 9529 self.assertEqual('cpu', cpu0.type) 9530 self.assertEqual(0, cpu0.index) 9531 9532 cpu0 = torch.device('cpu', 0) 9533 self.assertEqual('cpu:0', str(cpu0)) 9534 self.assertEqual('cpu', cpu0.type) 9535 self.assertEqual(0, cpu0.index) 9536 9537 cuda = torch.device('cuda') 9538 self.assertEqual('cuda', str(cuda)) 9539 self.assertEqual('cuda', cuda.type) 9540 self.assertEqual(None, cuda.index) 9541 9542 cuda1 = torch.device('cuda:1') 9543 self.assertEqual('cuda:1', str(cuda1)) 9544 self.assertEqual('cuda', cuda1.type) 9545 self.assertEqual(1, cuda1.index) 9546 9547 cuda1 = torch.device('cuda', 1) 9548 self.assertEqual('cuda:1', str(cuda1)) 9549 self.assertEqual('cuda', cuda1.type) 9550 self.assertEqual(1, cuda1.index) 9551 9552 cuda90 = torch.device('cuda', 90) 9553 self.assertEqual('cuda:90', str(cuda90)) 9554 self.assertEqual('cuda', cuda90.type) 9555 self.assertEqual(90, cuda90.index) 9556 9557 self.assertRaises(RuntimeError, lambda: torch.device('cpu:-1')) 9558 self.assertRaises(RuntimeError, lambda: torch.device('cuda:-1')) 9559 self.assertRaises(RuntimeError, lambda: torch.device('cuda:2 ')) 9560 self.assertRaises(RuntimeError, lambda: torch.device('cuda: 2')) 9561 self.assertRaises(RuntimeError, lambda: torch.device('cuda:2 2')) 9562 self.assertRaises(RuntimeError, lambda: torch.device('cuda:2.')) 9563 self.assertRaises(RuntimeError, lambda: torch.device('cuda:2?')) 9564 self.assertRaises(RuntimeError, lambda: torch.device('cuda:?2')) 9565 self.assertRaises(RuntimeError, lambda: torch.device('cuda:')) 9566 self.assertRaises(RuntimeError, lambda: torch.device('cuda:2.232')) 9567 self.assertRaises(RuntimeError, lambda: torch.device('cuda:2 cuda:3')) 9568 self.assertRaises(RuntimeError, lambda: torch.device('cuda:2+cuda:3')) 9569 self.assertRaises(RuntimeError, lambda: torch.device('cuda:2cuda:3')) 9570 self.assertRaises(RuntimeError, lambda: torch.device(-1)) 9571 9572 self.assertRaises(RuntimeError, lambda: torch.device('other')) 9573 self.assertRaises(RuntimeError, lambda: torch.device('other:0')) 9574 9575 device_set = {'cpu', 'cpu:0', 'cuda', 'cuda:0', 'cuda:1', 'cuda:10', 'cuda:100'} 9576 device_hash_set = set() 9577 device_hash_set.update(hash(torch.device(device)) for device in device_set) 9578 self.assertEqual(len(device_set), len(device_hash_set)) 9579 9580 def get_expected_device_repr(device): 9581 if device.index is not None: 9582 return f"device(type='{device.type}', index={device.index})" 9583 9584 return f"device(type='{device.type}')" 9585 9586 for device in device_set: 9587 dev = torch.device(device) 9588 self.assertEqual(repr(dev), get_expected_device_repr(dev)) 9589 9590 # Tests that the use_deterministic_flag can be set as expected 9591 @wrapDeterministicFlagAPITest 9592 def test_deterministic_flag(self): 9593 for deterministic, warn_only in product([True, False], [True, False]): 9594 torch.use_deterministic_algorithms(deterministic, warn_only=warn_only) 9595 self.assertEqual(deterministic, torch.are_deterministic_algorithms_enabled()) 9596 self.assertEqual(warn_only, torch.is_deterministic_algorithms_warn_only_enabled()) 9597 9598 if deterministic: 9599 if warn_only: 9600 debug_mode = 1 9601 else: 9602 debug_mode = 2 9603 else: 9604 debug_mode = 0 9605 9606 self.assertEqual(debug_mode, torch.get_deterministic_debug_mode()) 9607 9608 for debug_mode in [0, 1, 2]: 9609 torch.set_deterministic_debug_mode(debug_mode) 9610 self.assertEqual(debug_mode, torch.get_deterministic_debug_mode()) 9611 deterministic = debug_mode in [1, 2] 9612 warn_only = debug_mode == 1 9613 9614 self.assertEqual(deterministic, torch.are_deterministic_algorithms_enabled()) 9615 self.assertEqual(warn_only, torch.is_deterministic_algorithms_warn_only_enabled()) 9616 9617 for debug_mode, debug_mode_str in [(0, 'default'), (1, 'warn'), (2, 'error')]: 9618 torch.set_deterministic_debug_mode(debug_mode_str) 9619 self.assertEqual(debug_mode, torch.get_deterministic_debug_mode()) 9620 9621 with self.assertRaisesRegex( 9622 TypeError, 9623 r"_set_deterministic_algorithms\(\): argument 'mode' \(position 1\) must be bool, not int"): 9624 torch.use_deterministic_algorithms(1) 9625 9626 with self.assertRaisesRegex( 9627 TypeError, 9628 r"_set_deterministic_algorithms\(\): argument 'warn_only' must be bool, not int"): 9629 torch.use_deterministic_algorithms(False, warn_only=1) 9630 9631 # Tests that torch.utils.deterministic.fill_uninitialized_memory can be set as expected 9632 def test_deterministic_fill_uninitialized_memory(self): 9633 with DeterministicGuard(True, fill_uninitialized_memory=False): 9634 self.assertFalse(torch.utils.deterministic.fill_uninitialized_memory) 9635 self.assertFalse(torch._C._get_deterministic_fill_uninitialized_memory()) 9636 9637 with DeterministicGuard(True, fill_uninitialized_memory=True): 9638 self.assertTrue(torch.utils.deterministic.fill_uninitialized_memory) 9639 self.assertTrue(torch._C._get_deterministic_fill_uninitialized_memory()) 9640 9641 self.assertFalse(torch.utils.deterministic.fill_uninitialized_memory) 9642 self.assertFalse(torch._C._get_deterministic_fill_uninitialized_memory()) 9643 9644 torch.utils.deterministic.fill_uninitialized_memory = False 9645 self.assertFalse(torch.utils.deterministic.fill_uninitialized_memory) 9646 self.assertFalse(torch._C._get_deterministic_fill_uninitialized_memory()) 9647 9648 torch.utils.deterministic.fill_uninitialized_memory = True 9649 self.assertTrue(torch.utils.deterministic.fill_uninitialized_memory) 9650 self.assertTrue(torch._C._get_deterministic_fill_uninitialized_memory()) 9651 9652 torch._C._set_deterministic_fill_uninitialized_memory(False) 9653 self.assertFalse(torch.utils.deterministic.fill_uninitialized_memory) 9654 self.assertFalse(torch._C._get_deterministic_fill_uninitialized_memory()) 9655 9656 torch._C._set_deterministic_fill_uninitialized_memory(True) 9657 self.assertTrue(torch.utils.deterministic.fill_uninitialized_memory) 9658 self.assertTrue(torch._C._get_deterministic_fill_uninitialized_memory()) 9659 9660 with self.assertRaisesRegex(RuntimeError, r"expected a bool, but got int"): 9661 torch.utils.deterministic.fill_uninitialized_memory = 1 9662 9663 def test_type_conversion_via_dtype_name(self): 9664 x = torch.tensor([1]) 9665 self.assertEqual(x.byte().dtype, torch.uint8) 9666 self.assertEqual(x.bool().dtype, torch.bool) 9667 self.assertEqual(x.char().dtype, torch.int8) 9668 self.assertEqual(x.double().dtype, torch.float64) 9669 self.assertEqual(x.float().dtype, torch.float32) 9670 self.assertEqual(x.half().dtype, torch.float16) 9671 self.assertEqual(x.int().dtype, torch.int32) 9672 self.assertEqual(x.bfloat16().dtype, torch.bfloat16) 9673 cfloat = x.cfloat() 9674 self.assertEqual(cfloat.dtype, torch.complex64) 9675 self.assertEqual(cfloat.real, x.float()) 9676 self.assertEqual(cfloat.imag, torch.zeros_like(cfloat.imag)) 9677 cdouble = x.cdouble() 9678 self.assertEqual(cdouble.dtype, torch.complex128) 9679 self.assertEqual(cdouble.real, x.double()) 9680 self.assertEqual(cdouble.imag, torch.zeros_like(cdouble.imag)) 9681 chalf = x.chalf() 9682 self.assertEqual(chalf.dtype, torch.complex32) 9683 self.assertEqual(chalf.real, x.half()) 9684 self.assertEqual(chalf.imag, torch.zeros_like(chalf.imag)) 9685 9686 def test_type_alias(self): 9687 type_alias_map = {torch.float64: torch.double, 9688 torch.float32: torch.float, 9689 torch.int32: torch.int, 9690 torch.int64: torch.long, 9691 torch.int16: torch.short, 9692 torch.float16: torch.half, 9693 torch.complex32: torch.chalf, 9694 torch.complex64: torch.cfloat} 9695 for dtype, alias in type_alias_map.items(): 9696 self.assertIs(alias, dtype) 9697 9698 def test_doc_template(self) -> None: 9699 """ 9700 Test that all public API doc strings use the same standard template for 9701 all common arguments such as tensor or dim 9702 """ 9703 from torch._torch_docs import __file__ as doc_file 9704 from torch._torch_docs import multi_dim_common, single_dim_common, factory_common_args, factory_like_common_args 9705 9706 with open(doc_file, encoding="utf-8") as f: 9707 doc_strs = f.read() 9708 9709 matches = re.findall( 9710 r'add_docstr\(([^,]+?),[^"\']*?(?:"""|\'\'\')(.*?)(?:"""|\'\'\')(?:\.|,?[^,\)]*?\))', 9711 doc_strs, 9712 re.MULTILINE | re.DOTALL, 9713 ) 9714 self.assertTrue(matches) 9715 9716 for m in matches: 9717 func = m[0].strip() 9718 desc = m[1].strip() 9719 9720 for common_args in [multi_dim_common, single_dim_common, factory_common_args, factory_like_common_args]: 9721 for k, v in common_args.items(): 9722 self.assertNotIn(v, desc, f'The argument description "{v}" in {func} can be ' 9723 f'replaced by {{{k}}}') 9724 9725 def test_doc(self): 9726 checked_types = (types.MethodType, types.FunctionType, 9727 types.BuiltinFunctionType, types.BuiltinMethodType) 9728 9729 def _test_namespace(ns, *skips): 9730 if isinstance(ns, object): 9731 ns_name = ns.__class__.__name__ 9732 else: 9733 ns_name = ns.__name__ 9734 skip_regexes = [] 9735 for r in skips: 9736 if isinstance(r, str): 9737 skip_regexes.append(re.compile(f'^{re.escape(r)}$')) 9738 else: 9739 skip_regexes.append(r) 9740 9741 for name in dir(ns): 9742 if name.startswith('_'): 9743 continue 9744 if name in ['real', 'imag']: 9745 y = torch.randn(1, dtype=torch.cfloat) 9746 var = getattr(y, name) 9747 elif name in ["H", "mT", "mH"]: 9748 y = torch.randn(1, 1) 9749 var = getattr(y, name) 9750 else: 9751 var = getattr(ns, name) 9752 if not isinstance(var, checked_types): 9753 continue 9754 doc = var.__doc__ 9755 has_doc = doc is not None and len(doc.strip()) > 0 9756 full_name = ns_name + '.' + name 9757 if any(r.match(name) for r in skip_regexes): 9758 self.assertFalse(has_doc, 9759 f'New docs have been added for {full_name}, please remove ' 9760 'it from the skipped list in TestTorch.test_doc') 9761 else: 9762 self.assertTrue(has_doc, f'{full_name} is missing documentation') 9763 9764 # FIXME: All of the following should be marked as expected failures 9765 # so that it is easier to tell when missing has been added. 9766 # FIXME: fix all the skipped ones below! 9767 test_namespace(torch.randn(1), # noqa: F821 9768 'as_strided_', 9769 re.compile('^clamp_(min|max)_?$'), 9770 'is_distributed', 9771 'is_nonzero', 9772 'is_same_size', 9773 'log_softmax', 9774 'map2_', 9775 'new', 9776 'reinforce', 9777 'relu', 9778 'relu_', 9779 'prelu', 9780 'resize', 9781 'resize_as', 9782 'softmax', 9783 'split_with_sizes', 9784 'unsafe_split_with_sizes', 9785 '_autocast_to_fp16', 9786 '_autocast_to_fp32', 9787 ) 9788 9789 test_namespace(torch.nn) # noqa: F821 9790 test_namespace(torch.nn.functional, 'assert_int_or_pair') # noqa: F821 9791 # TODO: add torch.* tests when we have proper namespacing on ATen functions 9792 # test_namespace(torch) 9793 9794 # FIXME: deprecate torch.Tensor constructor 9795 def test_tensor_ctor_scalar(self): 9796 x = torch.Tensor(torch.tensor(1.0)) 9797 self.assertEqual(x, torch.tensor(1.0)) 9798 9799 def test_deepcopy_gradient(self): 9800 from copy import deepcopy 9801 a = torch.zeros(10) 9802 a.grad = torch.ones(10) 9803 self.assertEqual(a.grad, deepcopy(a).grad) 9804 s = torch.zeros(10).to_sparse() 9805 s.grad = torch.ones(10).to_sparse() 9806 self.assertEqual(s.grad, deepcopy(s).grad) 9807 9808 # ensure sharing is not broken 9809 c = deepcopy([a, a.grad]) 9810 self.assertTrue(c[0].grad is c[1]) 9811 9812 def test_tensor_base_init(self): 9813 # Direct construction not OK 9814 self.assertRaises(RuntimeError, lambda: torch._C.TensorBase()) 9815 9816 # Subclassing it directly no OK 9817 with self.assertRaisesRegex(RuntimeError, "Cannot subclass"): 9818 class Tfail(torch._C.TensorBase): 9819 pass 9820 9821 # Doing so with Tensor is ok though 9822 class T(torch.Tensor): 9823 pass 9824 9825 T() 9826 9827 def test_storage_base_init(self): 9828 # Direct construction not OK 9829 self.assertRaises(RuntimeError, lambda: torch._C.StorageBase()) 9830 9831 # But construction of subclass is OK 9832 class T(torch._C.StorageBase): 9833 pass 9834 9835 T() 9836 9837 def test_tensor_base_new(self): 9838 9839 # OK to call super().__new__, see 9840 # https://github.com/pytorch/pytorch/issues/57421 9841 class TestTensor(torch.Tensor): 9842 @staticmethod 9843 def __new__(cls, x, *args, **kwargs): 9844 return super().__new__(cls, x, *args, **kwargs) 9845 9846 x = torch.ones(5) 9847 test_tensor = TestTensor(x) 9848 9849 def test_storage_base_new(self): 9850 9851 # OK to call super().__new__, see 9852 # https://github.com/pytorch/pytorch/issues/57421 9853 class TestStorage(torch._C.StorageBase): 9854 @staticmethod 9855 def __new__(cls, x, *args, **kwargs): 9856 return super().__new__(cls, x, *args, **kwargs) 9857 9858 x = torch.UntypedStorage(5) 9859 test_storage = TestStorage(x) 9860 9861 def test_pyobj_preserved(self): 9862 x = torch.empty(2) 9863 x.foo = 2 # put something on __dict__ 9864 y = torch.empty(2) 9865 y.grad = x 9866 del x # x is dead in Python 9867 self.assertEqual(y.grad.foo, 2) 9868 z = y.grad # it's live 9869 del z # it's dead again 9870 self.assertEqual(y.grad.foo, 2) 9871 9872 def test_subclass_preserved(self): 9873 class MyTensor(torch.Tensor): 9874 pass 9875 9876 x = MyTensor(torch.empty(2)) 9877 y = torch.empty(2) 9878 y.grad = x 9879 del x # x is dead in Python 9880 self.assertEqual(type(y.grad), MyTensor) 9881 z = y.grad # it's live 9882 del z # it's dead again 9883 self.assertEqual(type(y.grad), MyTensor) 9884 9885 @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") 9886 def test_storage_dealloc(self): 9887 m, t = Tracker.make() 9888 s0 = torch.UntypedStorage(10) 9889 s1 = s0 9890 s0._tracker = t 9891 del t 9892 9893 self.assertFalse(m[0]) 9894 del s0 9895 self.assertFalse(m[0]) 9896 del s1 9897 self.assertTrue(m[0]) 9898 9899 @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") 9900 def test_storage_from_tensor_dealloc(self): 9901 m, t = Tracker.make() 9902 a = torch.randn(10) 9903 s0 = a.untyped_storage() 9904 s0._tracker = t 9905 del t 9906 9907 s1 = a.untyped_storage() 9908 self.assertTrue(s0 is s1) 9909 self.assertTrue(hasattr(s1, '_tracker')) 9910 9911 del a 9912 9913 self.assertFalse(m[0]) 9914 del s0 9915 self.assertFalse(m[0]) 9916 del s1 9917 self.assertTrue(m[0]) 9918 9919 @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") 9920 def test_storage_from_tensor_dealloc_zombie(self): 9921 m, t = Tracker.make() 9922 a = torch.randn(10) 9923 s0 = a.untyped_storage() 9924 s0._tracker = t 9925 del t 9926 9927 s1 = a.untyped_storage() 9928 self.assertTrue(s0 is s1) 9929 self.assertTrue(hasattr(s1, '_tracker')) 9930 9931 self.assertFalse(m[0]) 9932 del s0 9933 self.assertFalse(m[0]) 9934 del s1 9935 self.assertFalse(m[0]) 9936 del a 9937 self.assertTrue(m[0]) 9938 9939 @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") 9940 def test_storage_from_tensor_dealloc_resurrected(self): 9941 m, t = Tracker.make() 9942 a = torch.randn(10) 9943 s0 = a.untyped_storage() 9944 s0._tracker = t 9945 del t 9946 9947 s1 = a.untyped_storage() 9948 self.assertTrue(s0 is s1) 9949 self.assertTrue(hasattr(s1, '_tracker')) 9950 9951 self.assertFalse(m[0]) 9952 del s0 9953 self.assertFalse(m[0]) 9954 del s1 9955 self.assertFalse(m[0]) 9956 9957 s0 = a.untyped_storage() 9958 self.assertTrue(isinstance(s0, torch.UntypedStorage)) 9959 9960 del a 9961 self.assertFalse(m[0]) 9962 del s0 9963 self.assertTrue(m[0]) 9964 9965 @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") 9966 def test_storage_dealloc_resurrected(self): 9967 m, t = Tracker.make() 9968 s = torch.UntypedStorage(10) 9969 s._tracker = t 9970 del t 9971 9972 a = torch.tensor(s) 9973 self.assertFalse(m[0]) 9974 del s 9975 9976 self.assertFalse(m[0]) 9977 9978 s = a.untyped_storage() 9979 self.assertTrue(isinstance(s, torch.UntypedStorage)) 9980 9981 del a 9982 self.assertFalse(m[0]) 9983 del s 9984 self.assertTrue(m[0]) 9985 9986 @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") 9987 def test_storage_dealloc_subclass_zombie(self): 9988 class MyStorage(torch.UntypedStorage): 9989 finalized_count = 0 9990 9991 def __del__(self): 9992 MyStorage.finalized_count += 1 9993 9994 m, t = Tracker.make() 9995 s = MyStorage(10) 9996 s._tracker = t 9997 del t 9998 9999 a = torch.tensor(s) 10000 self.assertFalse(m[0]) 10001 del s 10002 10003 self.assertEqual(MyStorage.finalized_count, 0) 10004 self.assertFalse(m[0]) 10005 10006 del a 10007 self.assertEqual(MyStorage.finalized_count, 1) 10008 self.assertTrue(m[0]) 10009 10010 @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") 10011 def test_storage_dealloc_subclass_resurrected(self): 10012 class MyStorage(torch.UntypedStorage): 10013 finalized_count = 0 10014 10015 def __del__(self): 10016 MyStorage.finalized_count += 1 10017 10018 m, t = Tracker.make() 10019 s = MyStorage(10) 10020 s._tracker = t 10021 del t 10022 10023 a = torch.tensor(s) 10024 self.assertFalse(m[0]) 10025 del s 10026 10027 self.assertEqual(MyStorage.finalized_count, 0) 10028 self.assertFalse(m[0]) 10029 10030 s = a.untyped_storage() 10031 del a 10032 self.assertFalse(m[0]) 10033 self.assertEqual(MyStorage.finalized_count, 0) 10034 self.assertTrue(isinstance(s, MyStorage)) 10035 del s 10036 self.assertEqual(MyStorage.finalized_count, 1) 10037 self.assertTrue(m[0]) 10038 10039 def test_tensor_slot_dealloc(self): 10040 10041 class SlotTensor1(torch.Tensor): 10042 __slots__ = ['slot1'] 10043 10044 class SlotTensor2(SlotTensor1): 10045 __slots__ = ['slot2'] 10046 10047 m1, t1 = Tracker.make() 10048 m2, t2 = Tracker.make() 10049 slot_tensor = SlotTensor2(torch.empty(2)) 10050 slot_tensor.slot1 = t1 10051 slot_tensor.slot2 = t2 10052 del t1 10053 del t2 10054 self.assertFalse(m1[0]) 10055 self.assertFalse(m2[0]) 10056 del slot_tensor 10057 self.assertTrue(m1[0]) 10058 self.assertTrue(m2[0]) 10059 10060 def test_storage_slot_dealloc(self): 10061 10062 class SlotStorage1(torch._C.StorageBase): 10063 __slots__ = ['slot1'] 10064 10065 class SlotStorage2(SlotStorage1): 10066 __slots__ = ['slot2'] 10067 10068 m1, t1 = Tracker.make() 10069 m2, t2 = Tracker.make() 10070 slot_storage = SlotStorage2(torch.UntypedStorage(2)) 10071 slot_storage.slot1 = t1 10072 slot_storage.slot2 = t2 10073 del t1 10074 del t2 10075 self.assertFalse(m1[0]) 10076 self.assertFalse(m2[0]) 10077 del slot_storage 10078 self.assertTrue(m1[0]) 10079 self.assertTrue(m2[0]) 10080 10081 @skipIfTorchDynamo("Not a suitable test for TorchDynamo") 10082 def test_tensor_dict_dealloc(self): 10083 m, t = Tracker.make() 10084 x = torch.empty(2) 10085 x.arf = t 10086 del t 10087 self.assertFalse(m[0]) 10088 del x 10089 self.assertTrue(m[0]) 10090 10091 @skipIfTorchDynamo("Not a suitable test for TorchDynamo") 10092 def test_storage_dict_dealloc(self): 10093 m, t = Tracker.make() 10094 x = torch.UntypedStorage(2) 10095 x.arf = t 10096 del t 10097 self.assertFalse(m[0]) 10098 del x 10099 self.assertTrue(m[0]) 10100 10101 def test_tensor_finalizer_dealloc(self): 10102 m = [False] 10103 10104 class FinalizerTensor(torch.Tensor): 10105 def __del__(self): 10106 m[0] = True 10107 10108 fin_tensor = FinalizerTensor(torch.empty(2)) 10109 self.assertFalse(m[0]) 10110 del fin_tensor 10111 self.assertTrue(m[0]) 10112 10113 def test_storage_finalizer_dealloc(self): 10114 m = [False] 10115 10116 class FinalizerStorage(torch._C.StorageBase): 10117 def __del__(self): 10118 m[0] = True 10119 10120 fin_storage = FinalizerStorage(torch.UntypedStorage(2)) 10121 self.assertFalse(m[0]) 10122 del fin_storage 10123 self.assertTrue(m[0]) 10124 10125 @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") 10126 def test_tensor_weakref_dealloc(self): 10127 x = torch.empty(2) 10128 m = [False] 10129 10130 def cb(r): 10131 m[0] = True 10132 10133 wref = weakref.ref(x, cb) 10134 del x 10135 self.assertTrue(m[0]) 10136 self.assertEqual(wref(), None) 10137 10138 @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") 10139 def test_storage_weakref_dealloc(self): 10140 10141 x = torch.UntypedStorage(2) 10142 m = [False] 10143 10144 def cb(r): 10145 m[0] = True 10146 10147 wref = weakref.ref(x, cb) 10148 del x 10149 self.assertTrue(m[0]) 10150 self.assertEqual(wref(), None) 10151 10152 @skipIfTorchDynamo("Not a suitable test for TorchDynamo") 10153 def test_tensor_cycle_via_dict(self): 10154 m1, t1 = Tracker.make() 10155 x = torch.empty(2) 10156 x._tracker = t1 10157 del t1 10158 10159 m2, t2 = Tracker.make() 10160 y = torch.empty(2) 10161 y._tracker = t2 10162 del t2 10163 10164 x._loop = y 10165 y._loop = x 10166 10167 # C++ reference should keep the cycle live! 10168 # This exercise THPVariable_subtype_traverse 10169 # NB: Because z.grad is a reference done entirely in C++, cycles 10170 # involving it directly are NOT broken by Python GC; you've 10171 # set up a good old C++ reference cycle which we cannot safely 10172 # break (because C++ references are allowed to be accessed 10173 # multithreaded-ly) (TODO: except maybe if you can prove that 10174 # only Python has access to the C++ object, in which case you can 10175 # also prove that no multithreaded access occurs) 10176 z = torch.empty(2) 10177 z.grad = x 10178 10179 del x 10180 del y 10181 10182 gc.collect() 10183 self.assertFalse(m1[0]) 10184 self.assertFalse(m2[0]) 10185 10186 with disable_gc(): 10187 del z 10188 self.assertFalse(m1[0]) 10189 self.assertFalse(m2[0]) 10190 10191 gc.collect() 10192 self.assertTrue(m1[0]) 10193 self.assertTrue(m2[0]) 10194 10195 @skipIfTorchDynamo("Not a suitable test for TorchDynamo") 10196 def test_storage_cycle_via_dict(self): 10197 m1, t1 = Tracker.make() 10198 x = torch.UntypedStorage(2) 10199 x._tracker = t1 10200 del t1 10201 10202 m2, t2 = Tracker.make() 10203 y = torch.UntypedStorage(2) 10204 y._tracker = t2 10205 del t2 10206 10207 x._loop = y 10208 y._loop = x 10209 10210 # C++ reference should keep the cycle live! 10211 # This exercise THPVariable_subtype_traverse 10212 # NB: Because z.grad is a reference done entirely in C++, cycles 10213 # involving it directly are NOT broken by Python GC; you've 10214 # set up a good old C++ reference cycle which we cannot safely 10215 # break (because C++ references are allowed to be accessed 10216 # multithreaded-ly) (TODO: except maybe if you can prove that 10217 # only Python has access to the C++ object, in which case you can 10218 # also prove that no multithreaded access occurs) 10219 z = torch.UntypedStorage(2) 10220 z.grad = x 10221 10222 del x 10223 del y 10224 10225 gc.collect() 10226 self.assertFalse(m1[0]) 10227 self.assertFalse(m2[0]) 10228 10229 with disable_gc(): 10230 del z 10231 self.assertFalse(m1[0]) 10232 self.assertFalse(m2[0]) 10233 10234 gc.collect() 10235 self.assertTrue(m1[0]) 10236 self.assertTrue(m2[0]) 10237 10238 def test_tensor_cycle_via_slots(self): 10239 m1 = [False] 10240 m2 = [False] 10241 10242 class SlotTensor1(torch.Tensor): 10243 __slots__ = ['slot1'] 10244 10245 def __del__(self): 10246 m1[0] = True 10247 10248 class SlotTensor2(SlotTensor1): 10249 __slots__ = ['slot2'] 10250 10251 def __del__(self): 10252 m2[0] = True 10253 10254 x = SlotTensor1(torch.empty(2)) 10255 y = SlotTensor2(torch.empty(2)) 10256 10257 x.slot1 = y 10258 y.slot2 = x 10259 10260 del x 10261 with disable_gc(): 10262 del y 10263 self.assertFalse(m1[0]) 10264 self.assertFalse(m2[0]) 10265 10266 gc.collect() 10267 self.assertTrue(m1[0]) 10268 self.assertTrue(m2[0]) 10269 10270 def test_storage_cycle_via_slots(self): 10271 m1 = [False] 10272 m2 = [False] 10273 10274 class SlotStorage1(torch._C.StorageBase): 10275 __slots__ = ['slot1'] 10276 10277 def __del__(self): 10278 m1[0] = True 10279 10280 class SlotStorage2(SlotStorage1): 10281 __slots__ = ['slot2'] 10282 10283 def __del__(self): 10284 m2[0] = True 10285 10286 x = SlotStorage1(torch.UntypedStorage(2)) 10287 y = SlotStorage2(torch.UntypedStorage(2)) 10288 10289 x.slot1 = y 10290 y.slot2 = x 10291 10292 del x 10293 with disable_gc(): 10294 del y 10295 self.assertFalse(m1[0]) 10296 self.assertFalse(m2[0]) 10297 10298 gc.collect() 10299 self.assertTrue(m1[0]) 10300 self.assertTrue(m2[0]) 10301 10302 @skipIfTorchDynamo("Not a suitable test for TorchDynamo") 10303 def test_storage_preserve_nonhermetic_in_hermetic_context(self): 10304 from torch.library import Library, impl 10305 global _my_storage 10306 10307 my_lib = Library("my_lib", "DEF") # noqa: TOR901 10308 my_lib.define('my_func() -> None') 10309 10310 a = torch.tensor([1.]) 10311 _my_storage = a.untyped_storage() 10312 10313 m, t = Tracker.make() 10314 _my_storage._tracker = t 10315 del t 10316 10317 @impl(my_lib, 'my_func', '') 10318 def my_func(): 10319 global _my_storage 10320 del _my_storage 10321 10322 self.assertFalse(m[0]) 10323 torch.ops.my_lib.my_func() 10324 self.assertFalse(m[0]) 10325 10326 s = a.untyped_storage() 10327 del a 10328 del s 10329 self.assertTrue(m[0]) 10330 10331 # FIXME: move to test_autograd? 10332 @skipIfTorchDynamo("TorchDynamo does not work well with hooks") 10333 def test_backward_hooks_traverse(self): 10334 m1, t1 = Tracker.make() 10335 m2, t2 = Tracker.make() 10336 x = torch.empty(2, requires_grad=True) 10337 x._tracker = t1 10338 y = torch.empty(2, requires_grad=True) 10339 y._tracker = t2 10340 del t1 10341 del t2 10342 10343 # this hits a special setter, it's not just a __dict__ entry 10344 x._backward_hooks = y 10345 y._backward_hooks = x 10346 10347 del x 10348 with disable_gc(): 10349 del y 10350 self.assertFalse(m1[0]) 10351 self.assertFalse(m2[0]) 10352 10353 gc.collect() 10354 10355 self.assertTrue(m1[0]) 10356 self.assertTrue(m2[0]) 10357 10358 @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") 10359 def test_tensor_dead_weak_ref(self): 10360 x = torch.empty(2) 10361 w_x = weakref.ref(x) 10362 y = torch.empty(2) 10363 y.grad = x 10364 del x 10365 10366 x = w_x() 10367 # Ideally, x would keep the tensor live. But CPython doesn't 10368 # provide enough hooks to do this. So it will go dead and x 10369 # will transmute into an undefined tensor. Not great, but the 10370 # best we can do. 10371 del y 10372 10373 self.assertRaises(RuntimeError, lambda: x.sigmoid()) 10374 10375 @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") 10376 def test_storage_dead_weak_ref(self): 10377 x = torch.UntypedStorage(2) 10378 w_x = weakref.ref(x) 10379 y = torch.tensor(x) 10380 del x 10381 10382 x = w_x() 10383 # Ideally, x would keep the storage live. But CPython doesn't 10384 # provide enough hooks to do this. So it will go dead and x 10385 # will transmute into storage with null StorageImpl. Not great, but the 10386 # best we can do. 10387 del y 10388 10389 self.assertRaisesRegex(RuntimeError, "Got a null Storage", lambda: x[0]) 10390 self.assertRaisesRegex(RuntimeError, "Got a null Storage", lambda: x.float()) 10391 10392 def test_tensor_resurrected_weak_ref(self): 10393 x = torch.empty(2) 10394 w_x = weakref.ref(x) 10395 y = torch.empty(2) 10396 y.grad = x 10397 del x 10398 10399 x = w_x() 10400 # Use this to manually fix weak references after dereferencing them 10401 x._fix_weakref() 10402 del y 10403 x.sigmoid() 10404 10405 def test_storage_resurrected_weak_ref(self): 10406 x = torch.UntypedStorage(2) 10407 w_x = weakref.ref(x) 10408 y = torch.tensor(x) 10409 del x 10410 10411 x = w_x() 10412 # Use this to manually fix weak reference after dereferencing them 10413 x._fix_weakref() 10414 del y 10415 x.float() 10416 10417 @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") 10418 def test_tensor_fix_weakref_no_leak(self): 10419 import weakref 10420 10421 called = False 10422 10423 a = torch.randn(1) 10424 10425 def callback(w): 10426 nonlocal called 10427 called = True 10428 wa = weakref.ref(a, callback) 10429 a._fix_weakref() 10430 del a 10431 10432 self.assertTrue(called) 10433 10434 @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") 10435 def test_storage_fix_weakref_no_leak(self): 10436 import weakref 10437 10438 called = False 10439 10440 a = torch.UntypedStorage(1) 10441 10442 def callback(w): 10443 nonlocal called 10444 called = True 10445 wa = weakref.ref(a, callback) 10446 a._fix_weakref() 10447 del a 10448 10449 self.assertTrue(called) 10450 10451 # FIXME: move to test_linalg 10452 @torch.inference_mode() 10453 def test_bmm_multithreaded(self): 10454 device = 'cpu' 10455 num_threads = torch.get_num_threads() 10456 10457 torch.set_num_threads(4) 10458 batch_sizes = [1, 10] 10459 M, N, O = 23, 8, 12 10460 dtype = torch.float32 10461 numpy_dtype = dtype 10462 10463 def invert_perm(p): 10464 d = {x: i for i, x in enumerate(p)} 10465 return (d[0], d[1], d[2]) 10466 10467 def generate_inputs(num_batches): 10468 # transposed tensors 10469 for perm1, perm2 in itertools.product(itertools.permutations((0, 1, 2)), repeat=2): 10470 b1 = make_tensor((num_batches, M, N), dtype=dtype, device=device, low=-1, high=1) 10471 b2 = make_tensor((num_batches, N, O), dtype=dtype, device=device, low=-1, high=1) 10472 b1 = b1.permute(perm1).contiguous().permute(invert_perm(perm1)) 10473 b2 = b2.permute(perm2).contiguous().permute(invert_perm(perm2)) 10474 yield b1, b2 10475 # broadcasting tensors 10476 for b1, b2, b3, b4, b5, b6 in itertools.product((True, False), repeat=6): 10477 shape1 = (num_batches if b1 else 1, M if b2 else 1, N if b3 else 1) 10478 shape2 = (num_batches if b4 else 1, N if b5 else 1, O if b6 else 1) 10479 b1 = make_tensor(shape1, dtype=dtype, device=device, low=-1, high=1).expand(num_batches, M, N) 10480 b2 = make_tensor(shape2, dtype=dtype, device=device, low=-1, high=1).expand(num_batches, N, O) 10481 yield b1, b2 10482 # zero-sized tensors 10483 for z1, z2, z3, z4 in itertools.product((True, False), repeat=4): 10484 shape1 = (num_batches if z1 else 0, M if z2 else 0, N if z3 else 0) 10485 shape2 = (num_batches if z1 else 0, N if z3 else 0, O if z4 else 0) 10486 b1 = torch.randn(shape1, dtype=dtype, device=device) 10487 b2 = torch.randn(shape2, dtype=dtype, device=device) 10488 yield b1, b2 10489 10490 try: 10491 for num_batches in batch_sizes: 10492 for (b1, b2), perm3 in itertools.product(generate_inputs(num_batches), itertools.permutations((0, 1, 2))): 10493 res1 = torch.bmm(b1, b2) 10494 res2 = torch.full((num_batches, M, O), math.nan, dtype=dtype, device=device) \ 10495 .permute(perm3).contiguous().permute(invert_perm(perm3)) 10496 torch.bmm(b1, b2, out=res2) 10497 expect = torch.from_numpy( 10498 b1.to(numpy_dtype).cpu().numpy() @ b2.to(numpy_dtype).cpu().numpy()).to(device=device, dtype=dtype) 10499 self.assertEqual(expect, res1) 10500 self.assertEqual(expect, res2) 10501 finally: 10502 torch.set_num_threads(num_threads) 10503 10504 def test_conj_neg_tolist(self): 10505 x = torch.randn(2, dtype=torch.cfloat) 10506 y1 = x.conj() 10507 y1_expect = x.conj_physical() 10508 y2 = y1.imag 10509 self.assertEqual(y1, y1_expect.tolist()) 10510 self.assertEqual(y2, y1_expect.imag.tolist()) 10511 10512 @unittest.skipIf(torch.backends.cuda.is_built(), "Skipped for cuda-enabled build") 10513 def test_no_cuda_monkeypatch(self): 10514 # Note that this is not in test_cuda.py as this whole file is skipped when cuda 10515 # is not available. 10516 with self.assertRaisesRegex(RuntimeError, "Tried to instantiate dummy base class Stream"): 10517 torch.cuda.Stream() 10518 10519 with self.assertRaisesRegex(RuntimeError, "Tried to instantiate dummy base class Event"): 10520 torch.cuda.Event() 10521 10522 with self.assertRaisesRegex(RuntimeError, "Tried to instantiate dummy base class CUDAGraph"): 10523 torch.cuda.graphs.CUDAGraph() 10524 10525 def test_tensor_where_scalar(self): 10526 10527 a = torch.arange(4.0) 10528 not_zero = 0.001 10529 10530 # b is generated through torch.where function with not_zero being a scalar parameter 10531 b = torch.where(a != 0, a, not_zero) 10532 # c is generated through Tensor.where method with not_zero being a scalar parameter 10533 c = a.where(a != 0, not_zero) 10534 10535 self.assertEqual(b, c) 10536 10537 def test_data_ptr_of_empty_tensor_with_storage(self): 10538 t = torch.empty((2, 2)) 10539 self.assertNotEqual(t.data_ptr(), 0) 10540 t.resize_((0, 2)) 10541 self.assertEqual(t.data_ptr(), 0) 10542 10543 def test_data_ptr_of_empty_view_with_storage(self): 10544 t = torch.empty((2, 2)) 10545 self.assertNotEqual(t.data_ptr(), 0) 10546 t2 = t[0:0].view(0, 1) 10547 self.assertEqual(t2.data_ptr(), 0) 10548 10549 def test_size_stride(self) -> None: 10550 t = torch.rand(2, 3, dtype=torch.float32) 10551 self.assertEqual(t.size(0), 2) 10552 self.assertEqual(t.size(dim=None), torch.Size([2, 3])) 10553 self.assertEqual(t.stride(dim=None), torch.Size([3, 1])) 10554 self.assertEqual(t.t().stride(), torch.Size([1, 3])) 10555 10556 def test_invalid_arg_error_handling(self) -> None: 10557 """ Tests that errors from old TH functions are propagated back """ 10558 for invalid_val in [-1, 2**65]: 10559 self.assertRaises(RuntimeError, lambda: torch.set_num_threads(invalid_val)) 10560 self.assertRaises(RuntimeError, lambda: torch.set_num_interop_threads(invalid_val)) 10561 10562 def _get_tensor_prop(self, t): 10563 preserved = ( 10564 id(t), 10565 # Refcount values get modified by Dynamo resume frames 10566 0 if TEST_WITH_TORCHDYNAMO else sys.getrefcount(t), 10567 ) 10568 slotnames = copyreg._slotnames(t.__class__) 10569 moved = ( 10570 slotnames, 10571 id(t.__dict__), 10572 tuple(t.__dict__.keys()), 10573 [getattr(t, name, None) for name in slotnames] 10574 ) 10575 return preserved, moved 10576 10577 def _checked_swap(self, t1, t2): 10578 t1_pres, t1_moved = self._get_tensor_prop(t1) 10579 t2_pres, t2_moved = self._get_tensor_prop(t2) 10580 10581 torch.utils.swap_tensors(t1, t2) 10582 10583 new_t1_pres, new_t1_moved = self._get_tensor_prop(t1) 10584 new_t2_pres, new_t2_moved = self._get_tensor_prop(t2) 10585 self.assertEqual(t1_pres, new_t1_pres) 10586 self.assertEqual(t2_pres, new_t2_pres) 10587 self.assertEqual(t1_moved, new_t2_moved) 10588 self.assertEqual(t2_moved, new_t1_moved) 10589 10590 # tests that PyObject slots on TensorImpl are correctly swapped by 10591 # checking that when the function applied on a swapped tensor is 10592 # returns doesn't change the TensorImpl, the returned value (which is 10593 # given by returning the reference to the PyObject in the TensorImpl's 10594 # PyObjectSlot) is still correct 10595 self.assertEqual(id(t1.fill_(0.5)), id(t1)) 10596 self.assertEqual(id(t2.fill_(0.5)), id(t2)) 10597 10598 @unittest.skipIf(TEST_WITH_TORCHDYNAMO, "Dynamo adds weakrefs") 10599 def test_swap_basic(self): 10600 ts = [ 10601 torch.rand(2), 10602 torch.rand(3, 3), 10603 torch.empty(3, dtype=torch.int), 10604 TwoTensor(torch.rand(4), torch.rand(4)) 10605 ] 10606 10607 for t1, t2 in itertools.combinations(ts, 2): 10608 t1 = t1.clone() 10609 t2 = t2.clone() 10610 t2.foo = "bar" 10611 holder = [] 10612 holder.append(t1) 10613 10614 self._checked_swap(t1, t2) 10615 10616 self.assertIs(holder[0], t1) 10617 self.assertEqual(t1.foo, "bar") 10618 10619 if t1.is_floating_point(): 10620 t3 = t1.clone().detach().requires_grad_(True) 10621 out = t3 * 2 10622 torch.utils.swap_tensors(t3, t2) 10623 with self.assertRaisesRegex(RuntimeError, "AccumulateGrad node that was poisoned by swap_tensors"): 10624 out.sum().backward() 10625 10626 wr = weakref.ref(t1) 10627 with self.assertRaisesRegex(RuntimeError, "has weakref"): 10628 torch.utils.swap_tensors(t1, t2) 10629 10630 10631 @unittest.skipIf(TEST_WITH_TORCHDYNAMO, "Dynamo adds weakrefs") 10632 def test_swap_fail_slots(self): 10633 class MyTwoTensor(TwoTensor): 10634 __slots__ = ("a", "b") 10635 10636 class MyTwoTensor2(TwoTensor): 10637 __slots__ = ("b", "a") 10638 10639 class MyTwoTensor3(TwoTensor): 10640 __slots__ = ("a", "b", "c", "d") 10641 10642 class MyTwoTensor4(TwoTensor): 10643 __slots__ = ("a", "c") 10644 10645 10646 t1 = torch.rand(4) 10647 t2 = TwoTensor(torch.rand(4), torch.rand(4)) 10648 t3 = MyTwoTensor(torch.rand(4), torch.rand(4)) 10649 t4 = MyTwoTensor(torch.rand(4), torch.rand(4)) 10650 t5 = MyTwoTensor2(torch.rand(4), torch.rand(4)) 10651 t6 = MyTwoTensor3(torch.rand(4), torch.rand(4)) 10652 t7 = MyTwoTensor3(torch.rand(4), torch.rand(4)) 10653 t8 = MyTwoTensor4(torch.rand(4), torch.rand(4)) 10654 10655 self._checked_swap(t1, t2) 10656 with self.assertRaisesRegex(RuntimeError, "Cannot swap t1 and t2 if they have different slots"): 10657 torch.utils.swap_tensors(t1, t3) 10658 with self.assertRaisesRegex(RuntimeError, "Cannot swap t1 and t2 if they have different slots"): 10659 torch.utils.swap_tensors(t2, t3) 10660 with self.assertRaisesRegex(RuntimeError, "Cannot swap t1 and t2 if they have different slots"): 10661 torch.utils.swap_tensors(t2, t8) 10662 self._checked_swap(t3, t4) 10663 self._checked_swap(t3, t5) 10664 with self.assertRaisesRegex(RuntimeError, "Cannot swap t1 and t2 if they have different slots"): 10665 torch.utils.swap_tensors(t3, t6) 10666 t3.c = "foo" 10667 t4.d = "bar" 10668 self._checked_swap(t3, t4) 10669 self.assertEqual(t4.c, "foo") 10670 self.assertEqual(t3.d, "bar") 10671 t6.c = "cat" 10672 t7.d = "dog" 10673 self._checked_swap(t6, t7) 10674 10675 @unittest.skipIf(torch.cuda.is_available(), "Test specific for CPU") 10676 def test_bf16_supported_on_cpu(self): 10677 self.assertFalse(torch.cuda.is_bf16_supported()) 10678 10679 10680# The following block extends TestTorch with negative dim wrapping tests 10681# FIXME: replace these with OpInfo sample inputs or systemic OpInfo tests 10682# Functions to test negative dimension wrapping 10683METHOD = 1 10684INPLACE_METHOD = 2 10685FUNCTIONAL = 4 10686DIM_ARG: None = None 10687 10688def make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim=0): 10689 def neg_dim_test(self): 10690 if isinstance(tensor_arg, list): 10691 assert METHOD not in types and INPLACE_METHOD not in types 10692 x = [torch.randn(arg) for arg in tensor_arg] 10693 ndim = len(tensor_arg[-1]) 10694 else: 10695 x = torch.randn(*tensor_arg) 10696 ndim = len(tensor_arg) 10697 ndim += extra_dim 10698 10699 n_dim_to_test = sum(e is DIM_ARG for e in arg_constr()) 10700 10701 for dims_val in combinations(range(ndim), n_dim_to_test): 10702 arg = arg_constr() 10703 arg_neg = copy.deepcopy(arg) 10704 idx = 0 10705 for i, v in enumerate(arg): 10706 if v is DIM_ARG: 10707 arg[i] = dims_val[idx] 10708 arg_neg[i] = dims_val[idx] - ndim 10709 idx += 1 10710 10711 if METHOD in types: 10712 a = getattr(x, name)(*arg) 10713 b = getattr(x, name)(*arg_neg) 10714 self.assertEqual(a, b) 10715 10716 if INPLACE_METHOD in types: 10717 a = x.clone() 10718 getattr(a, name + '_')(*arg) 10719 b = x.clone() 10720 getattr(b, name + '_')(*arg_neg) 10721 self.assertEqual(a, b) 10722 10723 if FUNCTIONAL in types: 10724 a = getattr(torch, name)(x, *arg) 10725 b = getattr(torch, name)(x, *arg_neg) 10726 self.assertEqual(a, b) 10727 10728 return neg_dim_test 10729 10730def idx_tensor(size, max_val): 10731 return torch.LongTensor(*size).random_(0, max_val - 1) 10732 10733def add_neg_dim_tests(): 10734 neg_dim_tests = [ 10735 ('narrow', (10, 20, 30), lambda: [DIM_ARG, 0, 5], [METHOD]), 10736 ('transpose', (10, 20, 30), lambda: [DIM_ARG, DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL]), 10737 ('size', (10, 20, 30), lambda: [DIM_ARG], [METHOD]), 10738 ('cat', [(2, 3, 4), (2, 3, 4)], lambda: [DIM_ARG], [FUNCTIONAL]), 10739 ('chunk', (10, 20, 30), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]), 10740 ('gather', (10, 20), lambda: [DIM_ARG, idx_tensor((10, 20), 10)], [METHOD, FUNCTIONAL]), 10741 ('index_select', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10)], [METHOD, FUNCTIONAL]), 10742 ('split', (10, 20), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]), 10743 ('squeeze', (10, 1, 20, 1), lambda: [DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL]), 10744 ('unbind', (2, 3, 4), lambda: [DIM_ARG], [FUNCTIONAL]), 10745 ('unsqueeze', (10, 20), lambda: [DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL], 1), 10746 ('logcumsumexp', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10747 ('cumprod', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10748 ('cumsum', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10749 ('cummax', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10750 ('cummin', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10751 ('mean', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10752 ('median', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10753 ('nanmedian', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10754 ('mode', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10755 ('norm', (10, 20), lambda: [2, DIM_ARG], [METHOD, FUNCTIONAL]), 10756 ('prod', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10757 ('std', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10758 ('sum', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10759 ('var', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10760 ('kthvalue', (10, 20), lambda: [3, DIM_ARG], [METHOD, FUNCTIONAL]), 10761 ('max', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10762 ('min', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10763 ('sort', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), 10764 ('topk', (10, 20), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]), 10765 ('renorm', (10, 20), lambda: [2, DIM_ARG, 1], [METHOD, INPLACE_METHOD, FUNCTIONAL]), 10766 ('index_add', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), torch.randn(10, 10)], [INPLACE_METHOD]), 10767 ('index_copy', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), torch.randn(10, 10)], [INPLACE_METHOD]), 10768 ('index_fill', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), 12], [INPLACE_METHOD]), 10769 ('scatter', (10, 10), lambda: [DIM_ARG, idx_tensor((10, 10), 10), torch.randn(10, 10)], [INPLACE_METHOD]), 10770 ('select', (10, 20), lambda: [DIM_ARG, 3], [METHOD]), 10771 ('unfold', (10, 20), lambda: [DIM_ARG, 5, 2], [METHOD]), 10772 ] 10773 10774 for decl in neg_dim_tests: 10775 if len(decl) == 4: 10776 name, tensor_arg, arg_constr, types = decl 10777 extra_dim = 0 10778 elif len(decl) == 5: 10779 name, tensor_arg, arg_constr, types, extra_dim = decl 10780 10781 test_name = 'test_' + name + '_neg_dim' 10782 10783 assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name 10784 setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim)) 10785 10786# TODO: these empy classes are temporarily instantiated for XLA compatibility 10787# once XLA updates their test suite it should be removed 10788class TestViewOps(TestCase): 10789 pass 10790 10791class TestTensorDeviceOps(TestCase): 10792 pass 10793 10794# Generates tests 10795# Note: test generation must be done at file scope, not within main, or 10796# pytest will fail. 10797add_neg_dim_tests() 10798instantiate_device_type_tests(TestViewOps, globals()) 10799instantiate_device_type_tests(TestVitalSignsCuda, globals()) 10800instantiate_device_type_tests(TestTensorDeviceOps, globals()) 10801instantiate_device_type_tests(TestTorchDeviceType, globals()) 10802instantiate_device_type_tests(TestDevicePrecision, globals(), except_for='cpu') 10803 10804if __name__ == '__main__': 10805 TestCase._default_dtype_check_enabled = True 10806 run_tests() 10807