# Owner(s): ["module: nn"] import itertools import math import unittest import warnings from itertools import product import torch import torch.autograd.forward_ad as fwAD import torch.backends.cudnn as cudnn import torch.nn as nn import torch.nn.functional as F from torch.testing import make_tensor from torch.testing._internal.common_cuda import ( TEST_CUDA, TEST_CUDNN, tf32_is_not_fp32, tf32_on_and_off, ) from torch.testing._internal.common_device_type import ( disablecuDNN, disableMkldnn, dtypes, dtypesIfCUDA, instantiate_device_type_tests, largeTensorTest, onlyCPU, onlyCUDA, onlyNativeDeviceTypes, precisionOverride, skipCPUIfNoMkldnn, skipCUDAIfCudnnVersionLessThan, skipCUDAIfMiopen, skipCUDAIfNoCudnn, skipCUDAIfNoMiopen, skipCUDAIfNotMiopenSuggestNHWC, skipCUDAIfRocm, skipCUDAIfRocmVersionLessThan, skipMeta, ) from torch.testing._internal.common_dtype import ( floating_and_complex_types_and, floating_types_and, ) from torch.testing._internal.common_nn import _test_module_empty_input, NNTestCase from torch.testing._internal.common_utils import ( download_file, dtype2prec_DONTUSE, gradcheck, GRADCHECK_NONDET_TOL, gradgradcheck, instantiate_parametrized_tests, parametrize as parametrize_test, run_tests, set_default_dtype, skipIfNotMiopenSuggestNHWC, skipIfRocmVersionLessThan, subtest, TEST_SCIPY, TEST_WITH_ROCM, ) AMPERE_OR_ROCM = TEST_WITH_ROCM or tf32_is_not_fp32() if TEST_SCIPY: import scipy.ndimage import scipy.signal class TestConvolutionNN(NNTestCase): _do_cuda_memory_leak_check = True _do_cuda_non_default_stream = True def test_conv_backcompat(self): from torch.serialization import SourceChangeWarning # This file was generated by running on PyTorch 1.0.1 on Python 2: # # import torch # from torch import nn # m = nn.Conv2d(1, 1, 1) # torch.save(m, 'legacy_conv2d.pt') # # NB: This Pickle also contains some Unicode data! path = download_file("https://download.pytorch.org/test_data/legacy_conv2d.pt") with warnings.catch_warnings(): warnings.simplefilter("ignore", SourceChangeWarning) # weights_only=False as this is legacy code that saves the model m = torch.load(path, encoding="utf-8", weights_only=False) input = torch.randn((1, 1, 1, 1), dtype=torch.float) self.assertEqual(m(input).size(), (1, 1, 1, 1)) def test_invalid_conv1d(self): for dtype in [ torch.half, torch.bfloat16, torch.float, torch.double, torch.cfloat, torch.cdouble, ]: module = nn.Conv1d( in_channels=3, out_channels=33, kernel_size=10, stride=1, bias=True ).to(dtype) input = torch.randn(1, 3, 4).to(dtype) with self.assertRaisesRegex( RuntimeError, r"Calculated padded input size per channel: \(4\). " + r"Kernel size: \(10\). Kernel size can\'t be greater than actual input size", ): module(input) # Negative stride check module = nn.Conv1d( in_channels=3, out_channels=6, kernel_size=3, stride=-1, bias=True ).to(dtype) input = torch.randn(1, 3, 4).to(dtype) with self.assertRaisesRegex( RuntimeError, "non-positive stride is not supported" ): module(input) def test_mismatch_shape_conv2d(self): for dtype in (torch.float, torch.cfloat): x = torch.randn(1, 10, 1, 28, 28, dtype=dtype) w = torch.randn(6, 1, 5, 5, dtype=dtype) with self.assertRaisesRegex( RuntimeError, r"Expected 3D \(unbatched\) or 4D \(batched\) input to conv2d, but got " + r"input of size: \[1, 10, 1, 28, 28\]", ): F.conv2d(x, w) def test_conv2d_discontiguous_weight(self): for dtype in (torch.float, torch.cfloat): # Test for https://github.com/pytorch/pytorch/issues/55781 x = torch.ones(64, 16, 16, 16, dtype=dtype) weight = ( torch.arange(0, 1.0, 1 / 2.0**10) .reshape(32, 16, 1, 2) .to(dtype)[:, :, :, ::2] ) self.assertFalse(weight.is_contiguous()) y = torch.nn.functional.conv2d(x, weight, None) if torch.backends.mkldnn.is_available(): # Disable MKLDNN explicitly, so that either NNPACK or THCNN will be used with torch.backends.mkldnn.flags(enabled=False): y_ = torch.nn.functional.conv2d(x, weight, None) self.assertEqual(y, y_) self.assertEqual(y.sum(), 4186112.0) def test_invalid_conv2d(self): for dtype in [ torch.half, torch.bfloat16, torch.float, torch.double, torch.cfloat, torch.cdouble, ]: module = torch.nn.Conv2d(1, 1, kernel_size=3, dilation=2, stride=2).to( dtype ) input = torch.empty(1, 1, 4, 4).to(dtype) self.assertRaises(RuntimeError, lambda: module(input)) module = nn.Conv2d( in_channels=3, out_channels=33, kernel_size=10, stride=1, bias=True ) input = torch.randn(1, 3, 1, 1) with self.assertRaisesRegex( RuntimeError, r"Calculated padded input size per channel: \(1 x 1\). " + r"Kernel size: \(10 x 10\). Kernel size can\'t be greater than actual input size", ): module(input) # Negative stride check module = nn.Conv2d( in_channels=3, out_channels=6, kernel_size=4, stride=-1, bias=True ).to(dtype) input = torch.randn(1, 3, 4, 4).to(dtype) with self.assertRaisesRegex( RuntimeError, "non-positive stride is not supported" ): module(input) # Zero stride check module = nn.Conv2d( in_channels=3, out_channels=6, kernel_size=4, stride=0, bias=True ).to(dtype) input = torch.randn(1, 3, 4, 4).to(dtype) with self.assertRaisesRegex( RuntimeError, "non-positive stride is not supported" ): module(input) def test_invalid_conv3d(self): for dtype in [ torch.half, torch.bfloat16, torch.float, torch.double, torch.cfloat, torch.cdouble, ]: module = torch.nn.Conv3d(1, 1, kernel_size=3, dilation=2, stride=2).to( dtype ) input = torch.empty(1, 1, 4, 4, 4).to(dtype) self.assertRaises(RuntimeError, lambda: module(input)) # Negative stride check module = torch.nn.Conv3d(1, 1, kernel_size=3, stride=-2) input = torch.empty(1, 1, 4, 4, 4) with self.assertRaisesRegex( RuntimeError, "non-positive stride is not supported" ): module(input) def test_conv_invalid_groups(self): with self.assertRaisesRegex(ValueError, "groups must be a positive integer"): torch.nn.Conv1d(1, 1, kernel_size=3, dilation=2, stride=2, groups=0) with self.assertRaisesRegex(ValueError, "groups must be a positive integer"): torch.nn.Conv2d(1, 1, kernel_size=3, dilation=2, stride=2, groups=-1) with self.assertRaisesRegex(ValueError, "groups must be a positive integer"): torch.nn.Conv3d(1, 1, kernel_size=3, dilation=2, stride=2, groups=-2) def test_Conv1d_module_same_padding(self): # Compare module against functional: without strides/dilation, asymmetric padding x = torch.rand(1, 1, 20) module = nn.Conv1d( in_channels=1, out_channels=1, kernel_size=10, padding="same" ) expect = F.conv1d(x, module.weight, module.bias, padding="same") self.assertEqual(expect, module(x)) # Test dilation, symmetric padding module = nn.Conv1d( in_channels=1, out_channels=1, kernel_size=10, padding="same", dilation=2 ) expect = F.conv1d(x, module.weight, module.bias, padding="same", dilation=2) self.assertEqual(expect, module(x)) # Test non-zero padding_mode, requiring explicit padding module = nn.Conv1d( in_channels=1, out_channels=1, kernel_size=10, padding="same", padding_mode="replicate", ) x_padded = F.pad(x, [4, 5], mode="replicate") expect = F.conv1d(x_padded, module.weight, module.bias, padding="valid") self.assertEqual(expect, module(x)) self.assertEqual(x.size(), expect.size()) # Test connstruction with invalid padding string raises with self.assertRaisesRegex(ValueError, "Invalid padding string"): module = nn.Conv1d( in_channels=3, out_channels=33, kernel_size=10, padding="foo" ) # Test connstruction with same padding and strides raises with self.assertRaisesRegex(ValueError, "padding='same'"): module = nn.Conv1d( in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=2 ) def test_Conv2d_module_same_padding(self): # Compare module against functional: # without strides/dilation, both symmetric and asymmetric padding x = torch.rand(1, 1, 9, 20) module = nn.Conv2d( in_channels=1, out_channels=1, kernel_size=(5, 10), padding="same" ) expect = F.conv2d(x, module.weight, module.bias, padding="same") self.assertEqual(expect, module(x)) # with dilation, symmetric padding module = nn.Conv2d( in_channels=1, out_channels=1, kernel_size=(3, 4), padding="same", dilation=(1, 2), ) expect = F.conv2d( x, module.weight, module.bias, padding="same", dilation=(1, 2) ) self.assertEqual(expect, module(x)) # Test non-zero padding_mode, requiring explicit padding module = nn.Conv2d( in_channels=1, out_channels=1, kernel_size=(3, 4), padding="same", padding_mode="reflect", ) x_padded = F.pad(x, [1, 2, 1, 1], mode="reflect") expect = F.conv2d(x_padded, module.weight, module.bias, padding="valid") self.assertEqual(expect, module(x)) self.assertEqual(x.size(), expect.size()) # Test connstruction with invalid padding string raises with self.assertRaisesRegex(ValueError, "Invalid padding string"): module = nn.Conv2d( in_channels=3, out_channels=33, kernel_size=10, padding="foo" ) # Test connstruction with same padding and strides raises with self.assertRaisesRegex(ValueError, "padding='same'"): module = nn.Conv2d( in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=2 ) with self.assertRaisesRegex(ValueError, "padding='same'"): module = nn.Conv2d( in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=(1, 3), ) with self.assertRaisesRegex(ValueError, "padding='same'"): module = nn.Conv2d( in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=(4, 1), ) def test_Conv3d_module_same_padding(self): # Compare module against functional: x = torch.rand(1, 1, 4, 4, 4) # without dilation, both symmetric and asymmetric padding module = nn.Conv3d( in_channels=1, out_channels=1, kernel_size=(2, 3, 4), padding="same" ) expect = F.conv3d(x, module.weight, module.bias, padding="same") self.assertEqual(expect, module(x)) # with dilation, both symmetric and asymmetric padding module = nn.Conv3d( in_channels=1, out_channels=1, kernel_size=(2, 3, 4), padding="same", dilation=(3, 2, 1), ) expect = F.conv3d( x, module.weight, module.bias, padding="same", dilation=(3, 2, 1) ) self.assertEqual(expect, module(x)) # Test non-zero padding_mode, requiring explicit padding module = nn.Conv3d( in_channels=1, out_channels=1, kernel_size=(2, 3, 4), padding="same", padding_mode="circular", ) x_padded = F.pad(x, [1, 2, 1, 1, 0, 1], mode="circular") expect = F.conv3d(x_padded, module.weight, module.bias, padding="valid") self.assertEqual(expect, module(x)) self.assertEqual(x.size(), expect.size()) # Test connstruction with invalid padding string raises with self.assertRaisesRegex(ValueError, "Invalid padding string"): module = nn.Conv3d( in_channels=3, out_channels=33, kernel_size=10, padding="foo" ) # Test connstruction with same padding and strides raises with self.assertRaisesRegex(ValueError, "padding='same'"): module = nn.Conv2d( in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=2 ) with self.assertRaisesRegex(ValueError, "padding='same'"): module = nn.Conv2d( in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=(1, 1, 3), ) with self.assertRaisesRegex(ValueError, "padding='same'"): module = nn.Conv2d( in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=(1, 4, 1), ) with self.assertRaisesRegex(ValueError, "padding='same'"): module = nn.Conv2d( in_channels=3, out_channels=33, kernel_size=10, padding="same", stride=(5, 1, 1), ) @unittest.skipIf(not TEST_CUDA, "CUDA not available") def test_thnn_conv_strided_padded_dilated(self): for convfn, dims, transposed in ( (torch.nn.functional.conv2d, 2, False), (torch.nn.functional.conv_transpose2d, 2, True), (torch.nn.functional.conv3d, 3, False), (torch.nn.functional.conv_transpose3d, 3, True), ): for stride, padding, dilation in ( (2, 0, 1), (1, 1, 1), (2, 1, 1), (1, 0, 2), ): kwargs = {"stride": stride, "padding": padding, "dilation": dilation} inp_shape = (1, 2) + dims * (4,) weight_shape = (2, 2) + dims * (1,) inputs = torch.randn( inp_shape, dtype=torch.double, device="cuda", requires_grad=True ) weight = torch.randn( weight_shape, dtype=torch.double, device="cuda", requires_grad=True ) bias = torch.randn( 2, dtype=torch.double, device="cuda", requires_grad=True ) with torch.backends.cudnn.flags(enabled=False): res = convfn(inputs, weight, bias, **kwargs) res_cpu = convfn(inputs.cpu(), weight.cpu(), bias.cpu(), **kwargs) self.assertEqual(res, res_cpu) with torch.backends.cudnn.flags(enabled=False): torch.autograd.gradcheck( lambda x, w, b: convfn(x, w, b, **kwargs), (inputs, weight, bias), ) torch.autograd.gradcheck( lambda x, w, b: convfn(x, w, b, **kwargs), (inputs.cpu(), weight.cpu(), bias.cpu()), ) def test_Conv2d_inconsistent_types(self): inputs = torch.randn(4, 1, 7, 7, dtype=torch.float) weights = torch.randn(1, 1, 3, 3, dtype=torch.double) # inconsistent types should raise an exception self.assertRaises(RuntimeError, lambda: nn.functional.conv2d(inputs, weights)) # but it should work with the same type nn.functional.conv2d(inputs.float(), weights.float()) @unittest.skipIf(not TEST_CUDA, "CUDA not available") def test_Conv2d_inconsistent_types_on_GPU_without_cudnn(self): inputs = torch.randn(4, 1, 7, 7, dtype=torch.float, device="cuda") weights = torch.randn(1, 1, 3, 3, dtype=torch.double, device="cuda") bias = torch.randn(1, dtype=torch.double, device="cuda") with torch.backends.cudnn.flags(enabled=False): # inconsistent types should raise an exception self.assertRaises( RuntimeError, lambda: nn.functional.conv2d(inputs, weights) ) self.assertRaises( RuntimeError, lambda: nn.functional.conv2d(inputs, weights.float(), bias), ) # but it should work with the same type nn.functional.conv2d(inputs.float(), weights.float(), bias.float()) def test_Conv2d_1x1(self): in_channels = 2 out_channels = 2 mod = torch.nn.Conv2d(2, 2, 1, bias=False).to(dtype=torch.double) input = torch.randn( 1, in_channels, 5, 5, requires_grad=True, dtype=torch.double ) for enabled in (False, True): with torch.backends.mkldnn.flags(enabled=enabled): gradcheck(F.conv2d, (input, mod.weight)) def test_Conv2d_OneDNN(self): def run_once(group_val=24, dilation=1): ifm = torch.ones([1, group_val, 6, 6], dtype=torch.float32) weights = torch.ones([group_val, 1, 3, 3], dtype=torch.float32) op = torch.nn.Conv2d( in_channels=group_val, out_channels=group_val, kernel_size=[3, 3], stride=[2, 2], padding=[1, 1], dilation=[dilation, dilation], groups=group_val, bias=False, padding_mode="zeros", ) op.weight.data = weights res = op(ifm) grad_in = torch.ones(res.shape, dtype=torch.float32) res.backward(grad_in) return op.weight.grad for gorup_val in (24, 48, 23, 25): for dilation in (1, 2): with torch.backends.mkldnn.flags(enabled=False): without_onednn = run_once(gorup_val, dilation) with torch.backends.mkldnn.flags(enabled=True): with_onednn = run_once(gorup_val, dilation) self.assertEqual(without_onednn, with_onednn) @unittest.skipIf(not TEST_CUDA, "CUDA not available") @unittest.skipIf(not TEST_CUDNN, "CUDNN not available") def test_cudnn_non_contiguous(self): x = torch.randn(192, 16, 50).cuda() x = x.permute(0, 2, 1).contiguous().permute(0, 2, 1) m = torch.nn.Conv1d( in_channels=16, out_channels=32, kernel_size=2, bias=True ).cuda() result = m(x) @unittest.skipIf(not TEST_CUDA, "CUDA not available") @unittest.skipIf(not TEST_CUDNN, "CUDNN not available") def test_cudnn_not_mutate_stride(self): weight = torch.randn(64, 64, 1, 1) x = torch.randn(2, 64, 10, 10).to(memory_format=torch.channels_last) weight_stride = weight.stride() def conv(x, weight): return torch.convolution( x, weight, stride=(1, 1), padding=(0, 0), dilation=(1, 1), transposed=False, output_padding=(0, 0), groups=1, bias=None, ) # should have run in nhwc without mutating input strides out_nhwc = conv(x, weight) self.assertEqual(weight.stride(), weight_stride) self.assertTrue(out_nhwc.is_contiguous(memory_format=torch.channels_last)) x = x.contiguous(memory_format=torch.contiguous_format) out_c = conv(x, weight) self.assertTrue(out_c.is_contiguous(memory_format=torch.contiguous_format)) self.assertEqual(out_c, out_nhwc) self.assertEqual(weight.stride(), weight_stride) @unittest.skipIf(not TEST_CUDA, "CUDA not available") @unittest.skipIf(not TEST_CUDNN, "CUDNN not available") def test_Conv2d_inconsistent_types_on_GPU_with_cudnn(self): inputs = torch.randn(4, 1, 7, 7, dtype=torch.float, device="cuda") weights = torch.randn(1, 1, 3, 3, dtype=torch.double, device="cuda") bias = torch.randn(1, dtype=torch.double, device="cuda") with torch.backends.cudnn.flags(enabled=True): # inconsistent types should raise an exception self.assertRaises( RuntimeError, lambda: nn.functional.conv2d(inputs, weights) ) self.assertRaises( RuntimeError, lambda: nn.functional.conv2d(inputs, weights.float(), bias), ) # but it should work with the same type nn.functional.conv2d(inputs.float(), weights.float(), bias.float()) def test_Conv2d_missing_argument(self): c = nn.Conv2d(3, 3, 3) self.assertRaises(TypeError, lambda: c(None)) def test_Conv2d_backward_twice(self): input = torch.randn(2, 3, 5, 5) c = nn.Conv2d(3, 3, 3) o1 = c(input) o1.sum().backward() self.assertRaisesRegex( RuntimeError, "Specify retain_graph=True", lambda: o1.sum().backward() ) def test_conv_modules_raise_error_on_incorrect_input_size(self): for dtype in [torch.half, torch.bfloat16, torch.double, torch.float]: modules = [ nn.Conv1d(3, 8, 3).to(dtype), nn.ConvTranspose1d(3, 8, 3).to(dtype), nn.Conv2d(3, 8, 3).to(dtype), nn.ConvTranspose2d(3, 8, 3).to(dtype), nn.Conv3d(3, 8, 3).to(dtype), nn.ConvTranspose3d(3, 8, 3).to(dtype), ] invalid_input_dims = [(1, 4), (1, 4), (2, 5), (2, 5), (3, 6), (3, 6)] for invalid_dims, module in zip(invalid_input_dims, modules): for dims in invalid_dims: input = torch.empty(torch.Size((3,) * dims)) self.assertRaises(RuntimeError, lambda: module(input)) def test_conv_shapecheck(self): def test(should_raise, module, input_size, dtype): input = torch.empty(3, *input_size).to(dtype) if should_raise: self.assertRaises(RuntimeError, lambda: module(input)) else: # just run it to ensure no exception raised. module(input) for dtype in [ torch.half, torch.bfloat16, torch.float, torch.double, torch.cfloat, torch.cdouble, ]: # Conv1d test(True, nn.Conv1d(1, 1, 3).to(dtype), (1, 2), dtype) test(True, nn.Conv1d(1, 1, 3, stride=2).to(dtype), (1, 2), dtype) test(False, nn.Conv1d(1, 1, 2).to(dtype), (1, 2), dtype) test(False, nn.Conv1d(1, 1, 2, stride=2).to(dtype), (1, 2), dtype) test( False, nn.Conv1d(1, 1, 3, stride=2, padding=1).to(dtype), (1, 2), dtype ) # Conv2d test(True, nn.Conv2d(1, 1, (3, 3)).to(dtype), (1, 2, 2), dtype) test(False, nn.Conv2d(1, 1, (3, 3)).to(dtype), (1, 3, 3), dtype) test(False, nn.Conv2d(1, 1, (3, 3), padding=1).to(dtype), (1, 2, 2), dtype) # Conv3D test(True, nn.Conv3d(1, 1, (3, 3, 3)).to(dtype), (1, 2, 2, 2), dtype) test(False, nn.Conv3d(1, 1, (3, 3, 3)).to(dtype), (1, 3, 3, 3), dtype) test( False, nn.Conv3d(1, 1, (3, 3, 3), padding=1).to(dtype), (1, 2, 2, 2), dtype, ) def test_ConvTranspose2d_output_size(self): m = nn.ConvTranspose2d(3, 4, 3, 3, 0, 2) i = torch.randn(2, 3, 6, 6) for h in range(15, 22): for w in range(15, 22): if 18 <= h <= 20 and 18 <= w <= 20: output = m(i, output_size=(h, w)) self.assertEqual(output.size()[2:], (h, w)) else: self.assertRaises(ValueError, lambda: m(i, (h, w))) def test_ConvTranspose2d_output_size_downsample_upsample(self): b, c, hid_c = 2, 3, 2 for h in range(13, 24): for w in range(13, 17): for k in range(2, 5): for d in range(1, 5): for s in range(1, 4): for p in range(3): conv = nn.Conv2d( in_channels=c, out_channels=hid_c, kernel_size=k, stride=s, padding=p, dilation=d, ) t_conv = nn.ConvTranspose2d( in_channels=hid_c, out_channels=c, kernel_size=k, stride=s, padding=p, dilation=d, ) i = torch.randn(b, c, h, w) out = t_conv(conv(i), output_size=i.shape) self.assertEqual(out.size()[2:], i.size()[2:]) def test_ConvTranspose3d_correct_output_size(self): # Check that ConvTranspose3d can take a 5d output_size. m = nn.ConvTranspose3d(2, 2, 2) i = torch.rand(1, 2, 1, 1, 1) out = m(i, output_size=(1, 2, 2, 2, 2)) @unittest.skipIf(not TEST_CUDA, "CUDA not available") def test_ConvTranspose2d_half_cublas_gemm(self): with torch.backends.cudnn.flags(enabled=False): inputs = torch.randn(1, 1, 16, 16, device="cuda", dtype=torch.half) deconv = ( nn.ConvTranspose2d(1, 1, 3, stride=2, padding=1, output_padding=1) .cuda() .half() ) output = deconv(inputs) output.mean().backward() # For https://github.com/pytorch/pytorch/pull/1273 # Almost identical to the above `test_Conv2d_naive_groups` @torch.backends.cudnn.flags(enabled=True, benchmark=False) @unittest.skipIf(TEST_WITH_ROCM, "Skipped on ROCm, since it is failing on ROCm 5.7") def test_Conv2d_groups_nobias(self): dev_dtypes = [("cpu", torch.float)] if TEST_CUDA: dev_dtypes += [("cuda", torch.float), ("cuda", torch.half)] if AMPERE_OR_ROCM: dev_dtypes += [("cuda", torch.bfloat16)] for device, dtype in dev_dtypes: m = nn.Conv2d(4, 4, kernel_size=3, groups=2, bias=False).to(device, dtype) i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=True) output = m(i) grad_output = torch.randn(2, 4, 4, 4, device=device, dtype=dtype) output.backward(grad_output) m1 = nn.Conv2d(2, 2, kernel_size=3, bias=False).to(device, dtype) m1.weight.data.copy_(m.weight.data[:2]) i1 = i.data[:, :2].contiguous().requires_grad_(True) output1 = m1(i1) output1.backward(grad_output[:, :2].contiguous()) m2 = nn.Conv2d(2, 2, kernel_size=3, bias=False).to(device, dtype) m2.weight.data.copy_(m.weight.data[2:]) i2 = i.data[:, 2:].contiguous().requires_grad_(True) output2 = m2(i2) output2.backward(grad_output[:, 2:].contiguous()) self.assertEqual(output, torch.cat([output1, output2], 1)) self.assertEqual( i.grad.data, torch.cat([i1.grad.data, i2.grad.data], 1), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) self.assertEqual( m.weight.grad.data, torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0), atol=1e-1 if dtype == torch.half else dtype2prec_DONTUSE[dtype], rtol=0, ) # Almost identical to the above `test_Conv2d_naive_groups` # Covering special case when group > 1, input-channel / group < 16 and output-channel is multiple of 16 # See also https://github.com/pytorch/pytorch/pull/18463#issuecomment-476563686 # and https://github.com/pytorch/pytorch/pull/18463#issuecomment-477001024 @torch.backends.cudnn.flags(enabled=True, benchmark=False) @unittest.skipIf(TEST_WITH_ROCM, "Skipped on ROCm, since it is failing on ROCm 5.7") def test_Conv2d_groups_nobias_v2(self): torch.manual_seed(123) dev_dtypes = [("cpu", torch.float)] if TEST_CUDA: dev_dtypes += [("cuda", torch.float), ("cuda", torch.half)] if AMPERE_OR_ROCM: dev_dtypes += [("cuda", torch.bfloat16)] for device, dtype in dev_dtypes: m = nn.Conv2d(4, 16, kernel_size=3, groups=2, bias=False).to(device, dtype) i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=True) output = m(i) grad_output = torch.randn(2, 16, 4, 4, device=device, dtype=dtype) output.backward(grad_output) m1 = nn.Conv2d(2, 8, kernel_size=3, bias=False).to(device, dtype) m1.weight.data.copy_(m.weight.data[:8]) i1 = i.data[:, :2].contiguous().requires_grad_(True) output1 = m1(i1) output1.backward(grad_output[:, :8].contiguous()) m2 = nn.Conv2d(2, 8, kernel_size=3, bias=False).to(device, dtype) m2.weight.data.copy_(m.weight.data[8:]) i2 = i.data[:, 2:].contiguous().requires_grad_(True) output2 = m2(i2) output2.backward(grad_output[:, 8:].contiguous()) self.assertEqual(output, torch.cat([output1, output2], 1)) self.assertEqual( i.grad.data, torch.cat([i1.grad.data, i2.grad.data], 1), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) self.assertEqual( m.weight.grad.data, torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0), atol=1e-1 if dtype == torch.half else dtype2prec_DONTUSE[dtype], rtol=0, ) # CPU-only test for group conv3d fast implementation using bmm # See: https://github.com/pytorch/pytorch/pull/36355 def test_Conv3d_groups_nobias(self): torch.manual_seed(123) m = nn.Conv3d(4, 16, kernel_size=3, groups=2, bias=False).to("cpu", torch.float) i = torch.randn( 2, 4, 6, 6, 6, device="cpu", dtype=torch.float, requires_grad=True ) output = m(i) grad_output = torch.randn(2, 16, 4, 4, 4, device="cpu", dtype=torch.float) output.backward(grad_output) m1 = nn.Conv3d(2, 8, kernel_size=3, bias=False).to("cpu", torch.float) m1.weight.data.copy_(m.weight.data[:8]) i1 = i.data[:, :2].contiguous().requires_grad_(True) output1 = m1(i1) output1.backward(grad_output[:, :8].contiguous()) m2 = nn.Conv3d(2, 8, kernel_size=3, bias=False).to("cpu", torch.float) m2.weight.data.copy_(m.weight.data[8:]) i2 = i.data[:, 2:].contiguous().requires_grad_(True) output2 = m2(i2) output2.backward(grad_output[:, 8:].contiguous()) self.assertEqual(output, torch.cat([output1, output2], 1)) self.assertEqual( i.grad.data, torch.cat([i1.grad.data, i2.grad.data], 1), atol=dtype2prec_DONTUSE[torch.float], rtol=0, ) self.assertEqual( m.weight.grad.data, torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0), atol=dtype2prec_DONTUSE[torch.float], rtol=dtype2prec_DONTUSE[torch.float], ) def test_Conv3d_groups_wbias(self): torch.manual_seed(123) m = nn.Conv3d(4, 16, kernel_size=3, groups=2, bias=True).to("cpu", torch.float) i = torch.randn( 2, 4, 6, 6, 6, device="cpu", dtype=torch.float, requires_grad=True ) output = m(i) grad_output = torch.randn(2, 16, 4, 4, 4, device="cpu", dtype=torch.float) output.backward(grad_output) m1 = nn.Conv3d(2, 8, kernel_size=3, bias=True).to("cpu", torch.float) m1.weight.data.copy_(m.weight.data[:8]) m1.bias.data.copy_(m.bias.data[:8]) i1 = i.data[:, :2].contiguous().requires_grad_(True) output1 = m1(i1) output1.backward(grad_output[:, :8].contiguous()) m2 = nn.Conv3d(2, 8, kernel_size=3, bias=True).to("cpu", torch.float) m2.weight.data.copy_(m.weight.data[8:]) m2.bias.data.copy_(m.bias.data[8:]) i2 = i.data[:, 2:].contiguous().requires_grad_(True) output2 = m2(i2) output2.backward(grad_output[:, 8:].contiguous()) self.assertEqual(output, torch.cat([output1, output2], 1)) self.assertEqual( i.grad.data, torch.cat([i1.grad.data, i2.grad.data], 1), atol=dtype2prec_DONTUSE[torch.float], rtol=dtype2prec_DONTUSE[torch.float], ) self.assertEqual( m.weight.grad.data, torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0), atol=dtype2prec_DONTUSE[torch.float], rtol=dtype2prec_DONTUSE[torch.float], ) self.assertEqual( m.bias.grad.data, torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0), atol=dtype2prec_DONTUSE[torch.float], rtol=dtype2prec_DONTUSE[torch.float], ) def test_conv_tbc(self): with set_default_dtype(torch.double): inp = torch.randn(9, 4, 5, requires_grad=True) weight = torch.randn(3, 5, 6, requires_grad=True) bias = torch.randn(6, requires_grad=True) gradcheck( lambda i, w, b, pad: F.conv_tbc(i, w, b, pad), (inp, weight, bias, 3) ) @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") @unittest.skipIf(not TEST_CUDNN, "needs cudnn") @skipIfRocmVersionLessThan((4, 3)) @skipIfNotMiopenSuggestNHWC def test_grouped_conv_cudnn_nhwc_support(self): # in order to catch the hols in grouped convolution in nhwc support for earlier cudnn version input = torch.randn((16, 16, 8, 8), dtype=torch.float16, device="cuda").to( memory_format=torch.channels_last ) weight = torch.randn((8, 4, 3, 3), dtype=torch.float16, device="cuda").to( memory_format=torch.channels_last ) out = torch.convolution( input, weight, None, (1, 1), (1, 1), (1, 1), False, (0, 0), 4 ) input = torch.randn((16, 8, 8, 8), dtype=torch.float16, device="cuda").to( memory_format=torch.channels_last ) out_transpose = torch.convolution( input, weight, None, (1, 1), (1, 1), (1, 1), True, (0, 0), 4 ) @unittest.expectedFailure @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") @unittest.skipIf(not TEST_CUDNN, "needs cudnn") def test_conv_cudnn_memory_layout_dominance(self): # desired behavior here is to have the memory_layout of conv.weight to # dominante the layout of output. # which is not the same as current behavior, we'll fix this in # following up PRs and remove the `expectedFailure` tag input = torch.randint( 1, 10, (2, 8, 4, 4), dtype=torch.float32, device="cuda", requires_grad=True ) conv = nn.Conv2d(8, 4, 3).cuda().float() out = conv(input) self.assertTrue(out.is_contiguous()) input = input.contiguous(memory_format=torch.channels_last) out = conv(input) self.assertTrue(out.is_contiguous()) conv.weight.data = conv.weight.contiguous(memory_format=torch.channels_last) out = conv(input) self.assertTrue(out.is_contiguous(memory_format=torch.channels_last)) input = input.contiguous() out = conv(input) self.assertTrue(out.is_contiguous(memory_format=torch.channels_last)) @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") def test_cudnn_noncontiguous_weight(self): # Noncontiguous weights must be contiguous() before being # passed to cuDNN input = torch.tensor([1, 1, 1], dtype=torch.double, device="cuda").view(1, 1, 3) weights1 = torch.tensor([1], dtype=torch.double, device="cuda").expand(1, 1, 2) weights2 = ( torch.tensor([1], dtype=torch.double, device="cuda") .expand(1, 1, 2) .contiguous() ) self.assertEqual( F.conv1d(input, weights1, bias=None, stride=2, dilation=2), F.conv1d(input, weights2, bias=None, stride=2, dilation=2), ) def run_grad_conv_test(self, func_forward, func_backward, dim=1, gradient="input"): for kern, inp_size in [(3, 6), (3, 7), (4, 9)]: for batch, stride, padding, chan_in, chan_out, dilation in product( [1, 2], [1, 2], [0, 1, 2], [2], [3], [1] ): for has_bias in [True, False]: input_shape = [batch, chan_in] weight_shape = [chan_out, chan_in] for _ in range(dim): input_shape.append(inp_size) weight_shape.append(kern) input = torch.randn(input_shape, requires_grad=True) weight = torch.randn(weight_shape, requires_grad=True) if has_bias: bias = torch.randn([chan_out], requires_grad=True) output = func_forward( input, weight, stride=stride, padding=padding, dilation=dilation, bias=bias, ) gradient_o = torch.randn(output.shape) gradient_w = torch.autograd.grad( output, input if (gradient == "input") else weight, gradient_o ) self.assertEqual( gradient_w[0], func_backward( input_shape if (gradient == "input") else input, weight_shape if (gradient == "weight") else weight, gradient_o, stride=stride, padding=padding, dilation=dilation, ), ) def test_grad_conv1d_input(self): self.run_grad_conv_test(F.conv1d, F.grad.conv1d_input, 1, "input") def test_grad_conv1d_weight(self): self.run_grad_conv_test(F.conv1d, F.grad.conv1d_weight, 1, "weight") def test_grad_conv2d_input(self): self.run_grad_conv_test(F.conv2d, F.grad.conv2d_input, 2, "input") def test_grad_conv2d_weight(self): self.run_grad_conv_test(F.conv2d, F.grad.conv2d_weight, 2, "weight") def test_grad_conv3d_input(self): self.run_grad_conv_test(F.conv3d, F.grad.conv3d_input, 3, "input") def test_grad_conv3d_weight(self): self.run_grad_conv_test(F.conv3d, F.grad.conv3d_weight, 3, "weight") @unittest.skipIf(not torch._nnpack_available(), "NNPACK unavailable") def test_nnpack_conv(self): for kern, inp_size in [(3, 6), (3, 7), (4, 9)]: for batch, stride, padding, chan_in, chan_out in product( [1, 2, 3, 4], [1, 2], [0, 1, 2], [2], [3] ): for has_bias in [True, False]: input_shape = [batch, chan_in] weight_shape = [chan_out, chan_in] for _ in range(2): input_shape.append(inp_size) weight_shape.append(kern) input = torch.randn( input_shape, requires_grad=True, dtype=torch.float ) weight = torch.randn( weight_shape, requires_grad=True, dtype=torch.float ) if has_bias: bias = torch.randn( [chan_out], requires_grad=True, dtype=torch.float ) output = torch._nnpack_spatial_convolution( input, weight, stride=stride, padding=padding, bias=bias ) output_expected = torch.nn.functional.conv2d( input, weight, stride=stride, padding=padding, bias=bias ) self.assertEqual(output, output_expected, atol=3e-4, rtol=0) gradient_o = torch.randn(output.shape, dtype=torch.float) grads = torch.autograd.grad(output, [input, weight], gradient_o) grads_expected = torch.autograd.grad( output_expected, [input, weight], gradient_o ) for gr, gr_expected in zip(grads, grads_expected): self.assertEqual(gr, gr_expected, atol=3e-4, rtol=0) def test_conv_padding_mode(self): with self.assertRaisesRegex(ValueError, "padding_mode must be one of"): nn.Conv2d(3, 3, 3, padding_mode="xyz") with self.assertRaisesRegex(ValueError, "padding_mode must be one of"): nn.Conv2d(3, 3, 3, padding_mode=3) with self.assertRaisesRegex(ValueError, 'Only "zeros" '): nn.ConvTranspose2d(3, 3, 3, padding_mode="reflect") def test_functional_grad_conv(self): # Conv 1D input = torch.randn(1, 1, 5, requires_grad=True) weight = torch.randn(1, 1, 3, requires_grad=True) output = F.conv1d(input, weight, dilation=2) grad_output = torch.randn(output.shape) grad_input_autograd, grad_weight_autograd = torch.autograd.grad( output, (input, weight), grad_output ) grad_input_functional = torch.nn.grad.conv1d_input( input.shape, weight, grad_output, dilation=2 ) self.assertEqual(grad_input_functional, grad_input_autograd) grad_weight_functional = torch.nn.grad.conv1d_weight( input, weight.shape, grad_output, dilation=2 ) self.assertEqual(grad_weight_functional, grad_weight_autograd) # Conv 2D input = torch.randn(1, 1, 5, 5, requires_grad=True) weight = torch.randn(1, 1, 3, 3, requires_grad=True) output = F.conv2d(input, weight, dilation=2) grad_output = torch.randn(output.shape) (grad_input_autograd, grad_weight_autograd) = torch.autograd.grad( output, (input, weight), grad_output ) grad_input_functional = torch.nn.grad.conv2d_input( input.shape, weight, grad_output, dilation=2 ) self.assertEqual(grad_input_functional, grad_input_autograd) grad_weight_functional = torch.nn.grad.conv2d_weight( input, weight.shape, grad_output, dilation=2 ) self.assertEqual(grad_weight_functional, grad_weight_autograd) # Conv 3D input = torch.randn(1, 1, 5, 5, 5, requires_grad=True) weight = torch.randn(1, 1, 3, 3, 3, requires_grad=True) output = F.conv3d(input, weight, dilation=2) grad_output = torch.randn(output.shape) (grad_input_autograd, grad_weight_autograd) = torch.autograd.grad( output, (input, weight), grad_output ) grad_input_functional = torch.nn.grad.conv3d_input( input.shape, weight, grad_output, dilation=2 ) self.assertEqual(grad_input_functional, grad_input_autograd) grad_weight_functional = torch.nn.grad.conv3d_weight( input, weight.shape, grad_output, dilation=2 ) self.assertEqual(grad_weight_functional, grad_weight_autograd) def test_functional_grad_conv2d(self): BATCH_SIZE = 4 IN_CH = 8 OUT_CH = 16 SPATIAL = 32 def _test_conv2d(stride, kernel_size, groups, dilation): padding = kernel_size // 2 input = ( torch.empty(BATCH_SIZE, IN_CH, SPATIAL, SPATIAL) .uniform_(-8.0, 8.0) .requires_grad_(True) ) weight = ( torch.empty(OUT_CH, IN_CH // groups, kernel_size, kernel_size) .uniform_(-4.0, 4.0) .requires_grad_(True) ) output = F.conv2d( input, weight, stride=stride, padding=padding, dilation=dilation, groups=groups, ) grad_output = torch.randn(output.shape) (grad_input_autograd, grad_weight_autograd) = torch.autograd.grad( output, (input, weight), grad_output ) grad_input_functional = torch.nn.grad.conv2d_input( input.shape, weight, grad_output, stride=stride, padding=padding, dilation=dilation, groups=groups, ) self.assertEqual(grad_input_functional, grad_input_autograd) grad_weight_functional = torch.nn.grad.conv2d_weight( input, weight.shape, grad_output, stride=stride, padding=padding, dilation=dilation, groups=groups, ) self.assertEqual(grad_weight_functional, grad_weight_autograd) strides = [1, 2] kernel_sizes = [1, 3, 5] groups = [1, 2, 4] dilates = [1, 2] for s, k, g, d in product(strides, kernel_sizes, groups, dilates): _test_conv2d(s, k, g, d) def test_permute_conv2d_issue_120211(self): def reproducer(radius: int): image = torch.rand(1, 1024, 1024, 3) image = image.permute(0, 3, 1, 2) kernel_x = torch.zeros([3, 1, 1, radius * 2 + 1], device=image.device) image = torch.nn.functional.conv2d(image, kernel_x, groups=image.shape[-3]) for i in range(0, 128): # This should not fail reproducer(radius=i) def test_conv3d_issue_120406(self): # This should not fail F.conv3d(torch.ones(2, 3, 8, 9, 26), torch.ones(3, 1, 1, 1, 17), groups=3) def test_conv1d_issue_120547(self): weight = torch.ones([16, 1, 32]) bias = torch.ones([16]) stride, padding, dilation, groups = (1, 16, 1, 16) input = torch.rand((1, 1, 16)) input = input.transpose(1, 2) # This should not fail F.conv1d(input, weight, bias, stride, padding, dilation, groups) class TestConvolutionNNDeviceType(NNTestCase): def run_conv_double_back_test( self, kern, stride, padding, chan_in, chan_out, batch_size, inp_size, dilation, no_weight, groups=1, use_cuda=False, use_bias=True, dtype=torch.double, ): if use_cuda: device = torch.device("cuda") else: device = torch.device("cpu") x = torch.randn( batch_size, chan_in, inp_size, inp_size, device=device, dtype=dtype, requires_grad=True, ) weight = torch.randn( chan_out, chan_in // groups, kern, kern, device=device, dtype=dtype, requires_grad=not no_weight, ) if use_bias: bias = torch.randn(chan_out, device=device, dtype=dtype, requires_grad=True) else: bias = None def func(*inputs): if use_bias: lx, lweight, lbias = inputs else: lx, lweight = inputs lbias = None # We disable cudnn during forward to avoid finite difference imprecision issues with cudnn.flags(enabled=False): out = F.conv2d(lx, lweight, lbias, stride, padding, dilation, groups) return out if use_bias: inputs = x, weight, bias else: inputs = x, weight dummy_out = func(*inputs) grad_y = torch.randn_like( dummy_out, device=device, dtype=dtype, requires_grad=True ) # Issue #15353: test mkldnn double backward, don't run gradgradcheck due # to imprecision issues if dtype == torch.float: (g,) = torch.autograd.grad(dummy_out.sum(), x, create_graph=True) return g.requires_grad return gradgradcheck(func, inputs, (grad_y,)) @onlyCUDA @skipCUDAIfNoCudnn @dtypes( *floating_and_complex_types_and( torch.half, *[torch.bfloat16] if AMPERE_OR_ROCM else [] ) ) def test_Conv2d_deterministic_cudnn(self, device, dtype): inputs = torch.randn(2, 3, 5, 5, device=device, dtype=dtype, requires_grad=True) with cudnn.flags(enabled=True, benchmark=True, deterministic=True): conv1 = torch.nn.Conv2d(3, 3, 3).to(device, dtype) conv2 = torch.nn.Conv2d(3, 3, 3).to(device, dtype) conv2.bias.data.copy_(conv1.bias.data) conv2.weight.data.copy_(conv1.weight.data) out1 = conv1(inputs) out2 = conv2(inputs) self.assertEqual(out1, out2, atol=0.0, rtol=0) y = torch.randn(out1.size(), device=device, dtype=dtype) out1.backward(y) out2.backward(y) self.assertEqual( conv1.bias.grad.data, conv2.bias.grad.data, atol=0.0, rtol=0 ) self.assertEqual( conv1.weight.grad.data, conv2.weight.grad.data, atol=0.0, rtol=0 ) @onlyCUDA @dtypes( *floating_types_and(torch.half, *[torch.bfloat16] if AMPERE_OR_ROCM else []) ) def test_Conv2d_large_workspace(self, device, dtype): # These sizes require huge cuDNN workspaces. Make sure we choose a # reasonable algorithm that does not run out of memory sizes = [ (1, 256, 109, 175), (1, 256, 80, 128), (1, 256, 120, 192), ] def run_test(benchmark): with torch.backends.cudnn.flags(enabled=True, benchmark=benchmark): conv = torch.nn.Conv2d(256, 256, kernel_size=3, padding=1).to( device, dtype ) for size in sizes: x = torch.randn(size, device=device, dtype=dtype) out = conv(x.detach().clone().requires_grad_()) out.backward(torch.ones_like(out)) run_test(benchmark=False) run_test(benchmark=True) @onlyCUDA @dtypes(torch.half, torch.float) def test_ConvTranspose2d_large_output_padding(self, device, dtype): net1 = torch.nn.ConvTranspose2d( 128, 64, kernel_size=3, stride=2, padding=1, output_padding=1 ).to(device=device, dtype=dtype) net2 = torch.nn.ConvTranspose2d( 64, 32, kernel_size=3, stride=2, padding=1, output_padding=1 ).to(device=device, dtype=dtype) net3 = torch.nn.ConvTranspose2d( 32, 3, kernel_size=3, stride=2, padding=1, output_padding=1 ).to(device=device, dtype=dtype) x = torch.rand(1, 128, 6, 6, device=device, dtype=dtype, requires_grad=True) x = net1(x) x = net2(x) x = net3(x) x.backward(torch.randn_like(x)) torch.cuda.synchronize() @onlyCUDA @dtypes(torch.float, torch.double, torch.half) # Very similar to test_Conv2d_naive_groups but with special care to handle # the number of groups == number of input channels @torch.backends.cudnn.flags(enabled=True, benchmark=False) @tf32_on_and_off(0.01) def test_Conv2d_depthwise_naive_groups(self, device, dtype): for depth_multiplier in [1, 2]: m = nn.Conv2d(2, 2 * depth_multiplier, kernel_size=3, groups=2).to( device, dtype ) i = ( torch.randn(2, 2, 6, 6, device="cuda", dtype=dtype) .div_(2) .requires_grad_() ) output = m(i) grad_output = ( torch.randn(2, 2 * depth_multiplier, 4, 4, device=device, dtype=dtype) / 2 ) output.backward(grad_output) offset = 1 * depth_multiplier m1 = nn.Conv2d(1, 1 * depth_multiplier, kernel_size=3).to(device, dtype) m1.weight.data = m.weight.data[:offset].clone() m1.bias.data = m.bias.data[:offset].clone() i1 = i.detach()[:, :1].clone().requires_grad_() output1 = m1(i1) output1.backward(grad_output[:, :offset].contiguous()) m2 = nn.Conv2d(1, 1 * depth_multiplier, kernel_size=3).to(device, dtype) m2.weight.data.copy_(m.weight.data[offset:]) m2.bias.data.copy_(m.bias.data[offset:]) i2 = i.detach()[:, 1:].clone().requires_grad_() output2 = m2(i2) output2.backward(grad_output[:, offset:].contiguous()) self.assertEqual( output, torch.cat([output1, output2], 1), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) self.assertEqual( i.grad.data, torch.cat([i1.grad.data, i2.grad.data], 1), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) self.assertEqual( m.bias.grad.data, torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) self.assertEqual( m.weight.grad.data, torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) @onlyCUDA @dtypes(torch.float, torch.double, torch.half) @torch.backends.cudnn.flags(enabled=True, benchmark=False) @tf32_on_and_off(0.01) def test_Conv3d_depthwise_naive_groups(self, device, dtype): for depth_multiplier in [1, 2]: m = nn.Conv3d(2, 2 * depth_multiplier, kernel_size=3, groups=2).to( device, dtype ) i = ( torch.randn(2, 2, 6, 6, 6, device="cuda", dtype=dtype) .div_(2) .requires_grad_() ) output = m(i) grad_output = ( torch.randn( 2, 2 * depth_multiplier, 4, 4, 4, device=device, dtype=dtype ) / 2 ) output.backward(grad_output) offset = 1 * depth_multiplier m1 = nn.Conv3d(1, 1 * depth_multiplier, kernel_size=3).to(device, dtype) m1.weight.data = m.weight.data[:offset].clone() m1.bias.data = m.bias.data[:offset].clone() i1 = i.detach()[:, :1].clone().requires_grad_() output1 = m1(i1) output1.backward(grad_output[:, :offset].contiguous()) m2 = nn.Conv3d(1, 1 * depth_multiplier, kernel_size=3).to(device, dtype) m2.weight.data.copy_(m.weight.data[offset:]) m2.bias.data.copy_(m.bias.data[offset:]) i2 = i.detach()[:, 1:].clone().requires_grad_() output2 = m2(i2) output2.backward(grad_output[:, offset:].contiguous()) is_cuda_sm86 = device.startswith( "cuda" ) and torch.cuda.get_device_capability(0) == (8, 6) atol, rtol = ( (3e-4, 3e-2) if dtype == torch.float32 and is_cuda_sm86 else (dtype2prec_DONTUSE[dtype], 0) ) self.assertEqual( output, torch.cat([output1, output2], 1), atol=atol, rtol=rtol ) self.assertEqual( i.grad.data, torch.cat([i1.grad.data, i2.grad.data], 1), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) self.assertEqual( m.bias.grad.data, torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) self.assertEqual( m.weight.grad.data, torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0), atol=atol, rtol=rtol, ) @onlyCUDA @dtypes( *floating_types_and(torch.half, *[torch.bfloat16] if AMPERE_OR_ROCM else []) ) def test_noncontig_conv_grad(self, device, dtype): # FIXME: remove after adding non-contiguous grad tests for all modules module = nn.Conv2d(3, 5, kernel_size=3, padding=1).to(device, dtype) input = torch.randn( 2, 3, 10, 10, dtype=dtype, device=device, requires_grad=True ) output = module(input) grad = torch.randn(2, 2, 5, 10, 10, dtype=dtype, device=device)[:, 1] assert not grad.is_contiguous() output.backward(grad, retain_graph=True) self.assertIsNotNone(input.grad) result = input.grad.data.clone() input.grad.data.zero_() output.backward(grad.contiguous()) self.assertEqual( result, input.grad.data, atol=dtype2prec_DONTUSE[dtype], rtol=0 ) @onlyCUDA @dtypes(torch.double) def test_conv_double_backward(self, device, dtype): with torch.backends.cudnn.flags(enabled=True, deterministic=True): # Double backward only runs with DoubleTensor due to precision reason batch_size = 1 for kern, inp_size, dilations in [(3, 5, [1, 2]), (4, 9, [1])]: for stride, padding, chan_in, chan_out, dilation in product( [1], [2], [2], [3], dilations ): no_weight = stride == 2 result = self.run_conv_double_back_test( kern, stride, padding, chan_in, chan_out, batch_size, inp_size, dilation, no_weight, use_cuda=True, dtype=dtype, ) self.assertTrue( result, "Conv double backward test failed with parameters:" + "\nkern: " + str(kern) + "\nstride: " + str(stride) + "\npadding: " + str(padding) + "\nchan_in: " + str(chan_in) + "\nchan_out: " + str(chan_out) + "\nbatch_size: " + str(batch_size) + "\ninp_size: " + str(inp_size) + "\ndilation: " + str(dilation), ) def test_conv_double_backward_no_bias(self): kern = 3 stride = 2 chan_in, chan_out = 2, 4 batch_size = 2 inp_size = 5 padding = 1 dilation = 1 no_weight = False use_bias = True result = self.run_conv_double_back_test( kern, stride, padding, chan_in, chan_out, batch_size, inp_size, dilation, no_weight, use_bias=use_bias, ) self.assertTrue( result, "Conv double backward test failed with parameters:" + "\nkern: " + str(kern) + "\nstride: " + str(stride) + "\npadding: " + str(padding) + "\nchan_in: " + str(chan_in) + "\nchan_out: " + str(chan_out) + "\nbatch_size: " + str(batch_size) + "\ninp_size: " + str(inp_size) + "\ndilation: " + str(dilation), ) def test_conv_double_backward_groups(self): kern = 3 stride = 1 padding = 2 chan_in, chan_out = 2, 4 batch_size = 2 inp_size = 6 dilation = 1 no_weight = False groups = 2 result = self.run_conv_double_back_test( kern, stride, padding, chan_in * groups, chan_out * groups, batch_size, inp_size, dilation, no_weight, groups=groups, ) self.assertTrue( result, "Conv double backward test failed with parameters:" + "\nkern: " + str(kern) + "\nstride: " + str(stride) + "\npadding: " + str(padding) + "\nchan_in: " + str(chan_in) + "\nchan_out: " + str(chan_out) + "\nbatch_size: " + str(batch_size) + "\ninp_size: " + str(inp_size) + "\ndilation: " + str(dilation) + "\ngroups: " + str(groups), ) def test_conv_double_backward_stride(self): batch_size = 2 # Cannot provide ggW when stride is > 1 for kern, inp_size, dilations in [(3, 5, [1, 2]), (3, 7, [1])]: for stride, padding, chan_in, chan_out, dilation in product( [2], [0, 1], [1], [2], dilations ): no_weight = False self.run_conv_double_back_test( kern, stride, padding, chan_in, chan_out, batch_size, inp_size, dilation, no_weight, ) @dtypes(torch.float, torch.cfloat) @torch.backends.cudnn.flags(enabled=True, benchmark=False) def test_conv1d_same_padding(self, device, dtype): # Test padding='same' outputs the correct shape test_args = [ # in_size range(50, 55), # kernel_size [1, 2, 3, 8], # dilation range(1, 4), # stride [1], ] for in_size, k_size, dilation, stride in itertools.product(*test_args): x = torch.rand(1, 1, in_size, device=device, dtype=dtype) y = torch.rand(1, 1, k_size, device=device, dtype=dtype) z = F.conv1d(x, y, padding="same", dilation=dilation, stride=stride) self.assertEqual(z.size(2), int(math.ceil(in_size / stride))) # Compare F.conv1d padding='same' output against manual padding # Without strides/dilation x = torch.rand(1, 1, 12, device=device, dtype=dtype) y = torch.rand(1, 1, 3, device=device, dtype=dtype) expect = F.conv1d(x, y, padding=1) actual = F.conv1d(x, y, padding="same") self.assertEqual(expect, actual) # With dilation x = torch.rand(1, 1, 12, device=device, dtype=dtype) y = torch.rand(1, 1, 4, device=device, dtype=dtype) expect = F.conv1d(x, y, padding=3, dilation=2) actual = F.conv1d(x, y, padding="same", dilation=2) self.assertEqual(expect, actual) # Dilation with asymmetric padding expect = F.conv1d(x, y, padding=5, dilation=3)[..., 1:] actual = F.conv1d(x, y, padding="same", dilation=3) self.assertEqual(expect, actual) @dtypes(torch.float, torch.cfloat) def test_conv2d_same_padding(self, device, dtype): if dtype is torch.cfloat: rtol, atol = 2e-6, 2e-6 else: rtol, atol = None, None # Compare F.conv2d padding='same' output against manual padding # Without strides/dilation x = torch.rand(1, 1, 10, 11, device=device, dtype=dtype) y = torch.rand(1, 1, 4, 5, device=device, dtype=dtype) expect = F.conv2d(x, y, padding=(2, 2))[..., 1:, :] actual = F.conv2d(x, y, padding="same") self.assertEqual(expect, actual, rtol=rtol, atol=atol) # With dilation y = torch.rand(1, 1, 3, 4, device=device, dtype=dtype) expect = F.conv2d(x, y, padding=(2, 3), dilation=2) actual = F.conv2d(x, y, padding="same", dilation=2) self.assertEqual(expect, actual, rtol=rtol, atol=atol) # Dilation with asymmetric padding y = torch.rand(1, 1, 4, 4, device=device, dtype=dtype) expect = F.conv2d(x, y, padding=5, dilation=3)[..., 1:, 1:] actual = F.conv2d(x, y, padding="same", dilation=3) self.assertEqual(expect, actual, rtol=rtol, atol=atol) @dtypes(torch.float, torch.cfloat) def test_conv3d_same_padding(self, device, dtype): if dtype is torch.cfloat: rtol, atol = 2e-6, 2e-6 else: rtol, atol = None, None # Compare F.conv3d padding='same' output against manual padding # Without strides/dilation x = torch.rand(1, 1, 10, 11, 12, device=device, dtype=dtype) y = torch.rand(1, 1, 1, 2, 5, device=device, dtype=dtype) expect = F.conv3d(x, y, padding=(0, 1, 2))[..., :, 1:, :] actual = F.conv3d(x, y, padding="same") self.assertEqual(expect, actual, rtol=rtol, atol=atol) # With dilation expect = F.conv3d(x, y, padding=(0, 1, 4), dilation=2) actual = F.conv3d(x, y, padding="same", dilation=2) self.assertEqual(expect, actual, rtol=rtol, atol=atol) # Dilation with asymmetric padding y = torch.rand(1, 1, 4, 4, 4, device=device, dtype=dtype) expect = F.conv3d(x, y, padding=5, dilation=3)[..., 1:, 1:, 1:] actual = F.conv3d(x, y, padding="same", dilation=3) self.assertEqual(expect, actual, rtol=rtol, atol=atol) @dtypes(torch.float, torch.cfloat) def test_conv1d_valid_padding(self, device, dtype): # Test F.conv1d padding='valid' is the same as no padding x = torch.rand(1, 1, 10, device=device, dtype=dtype) y = torch.rand(1, 1, 4, device=device, dtype=dtype) expect = F.conv1d(x, y) actual = F.conv1d(x, y, padding="valid") self.assertEqual(expect, actual) @dtypes(torch.float, torch.cfloat) def test_conv2d_valid_padding(self, device, dtype): # Test F.conv2d padding='valid' is the same as no padding x = torch.rand(1, 1, 1, 10, device=device, dtype=dtype) y = torch.rand(1, 1, 1, 4, device=device, dtype=dtype) expect = F.conv2d(x, y) actual = F.conv2d(x, y, padding="valid") self.assertEqual(expect, actual) @dtypes(torch.float, torch.cfloat) def test_conv3d_valid_padding(self, device, dtype): # Test F.conv3d padding='valid' is the same as no padding x = torch.rand(1, 1, 1, 1, 10, dtype=dtype, device=device) y = torch.rand(1, 1, 1, 1, 4, dtype=dtype, device=device) expect = F.conv3d(x, y) actual = F.conv3d(x, y, padding="valid") self.assertEqual(expect, actual) @dtypes(torch.float, torch.cfloat) def test_conv1d_same_padding_backward(self, device, dtype): # Test F.conv1d gradients work with padding='same' x = torch.rand(1, 1, 12, dtype=dtype, device=device, requires_grad=True) y = torch.rand(1, 1, 4, dtype=dtype, device=device, requires_grad=True) # Symmetric padding z = F.conv1d(x, y, padding=3, dilation=2) z.sum().abs().backward() gx_expect, gy_expect = x.grad, y.grad x.grad, y.grad = None, None z = F.conv1d(x, y, padding="same", dilation=2) z.sum().abs().backward() self.assertEqual(gx_expect, x.grad) self.assertEqual(gy_expect, y.grad) x.grad, y.grad = None, None # Asymmetric padding z = F.conv1d(x, y, padding=2)[..., 1:] z.sum().abs().backward() gx_expect, gy_expect = x.grad, y.grad x.grad, y.grad = None, None z = F.conv1d(x, y, padding="same") z.sum().abs().backward() self.assertEqual(gx_expect, x.grad) self.assertEqual(gy_expect, y.grad) @dtypes(torch.float, torch.cfloat) @tf32_on_and_off(0.001) def test_conv2d_same_padding_backward(self, device, dtype): # Test F.conv2d gradients work with padding='same' x = torch.rand(1, 1, 10, 11, device=device, dtype=dtype, requires_grad=True) y = torch.rand(1, 1, 4, 5, device=device, dtype=dtype, requires_grad=True) # Symmetric padding z = F.conv2d(x, y, padding=(3, 4), dilation=2) z.sum().abs().backward() gx_expect, gy_expect = x.grad, y.grad x.grad, y.grad = None, None z = F.conv2d(x, y, padding="same", dilation=2) z.sum().abs().backward() self.assertEqual(gx_expect, x.grad) self.assertEqual(gy_expect, y.grad) x.grad, y.grad = None, None # Asymmetric padding y = torch.rand(1, 1, 4, 4, device=device, dtype=dtype, requires_grad=True) z = F.conv2d(x, y, padding=2)[..., 1:, 1:] z.sum().abs().backward() gx_expect, gy_expect = x.grad, y.grad x.grad, y.grad = None, None z = F.conv2d(x, y, padding="same") z.sum().abs().backward() self.assertEqual(gx_expect, x.grad) self.assertEqual(gy_expect, y.grad) @dtypes(torch.double, torch.cdouble) def test_conv3d_same_padding_backward(self, device, dtype): check_forward_ad = torch.device(device).type != "xla" # Test F.conv3d gradients work with padding='same' x = torch.rand(1, 1, 1, 11, 12, dtype=dtype, device=device, requires_grad=True) y = torch.rand(1, 1, 1, 2, 5, dtype=dtype, device=device, requires_grad=True) # Symmetric padding z = F.conv3d(x, y, padding=(0, 1, 4), dilation=2) z.sum().abs().backward() gx_expect, gy_expect = x.grad, y.grad x.grad, y.grad = None, None z = F.conv3d(x, y, padding="same", dilation=2) z.sum().abs().backward() self.assertEqual(gx_expect, x.grad) self.assertEqual(gy_expect, y.grad) x.grad, y.grad = None, None gradcheck( lambda x, y: F.conv3d(x, y, padding="same", dilation=2), (x, y), check_forward_ad=check_forward_ad, nondet_tol=1e-5, ) if torch.device(device).type != "cuda": # https://github.com/pytorch/pytorch/issues/70702 gradgradcheck( lambda x, y: F.conv3d(x, y, padding="same", dilation=2), (x, y), check_fwd_over_rev=True, ) # Asymmetric padding y = torch.rand(1, 1, 1, 4, 4, dtype=dtype, device=device, requires_grad=True) z = F.conv3d(x, y, padding=2)[..., 1:, 1:] z.sum().abs().backward() gx_expect, gy_expect = x.grad, y.grad x.grad, y.grad = None, None z = F.conv3d(x, y, padding="same") z.sum().abs().backward() self.assertEqual(gx_expect, x.grad) self.assertEqual(gy_expect, y.grad) gradcheck( lambda x, y: F.conv3d(x, y, padding="same"), (x, y), check_forward_ad=check_forward_ad, nondet_tol=1e-5, ) if torch.device(device).type != "cuda": # https://github.com/pytorch/pytorch/issues/70702 gradgradcheck( lambda x, y: F.conv3d(x, y, padding="same"), (x, y), check_fwd_over_rev=True, ) @dtypes(torch.float, torch.cfloat) def test_conv1d_valid_padding_backward(self, device, dtype): # Test F.conv1d gradients work with padding='valid' x = torch.rand(1, 1, 10, dtype=dtype, device=device, requires_grad=True) y = torch.rand(1, 1, 4, dtype=dtype, device=device, requires_grad=True) F.conv1d(x, y, padding=0).sum().abs().backward() gx_expect, gy_expect = x.grad, y.grad x.grad, y.grad = None, None F.conv1d(x, y, padding="valid").sum().abs().backward() gx_actual, gy_actual = x.grad, y.grad self.assertEqual(gx_expect, gx_actual) self.assertEqual(gy_expect, gy_actual) @unittest.skipIf(not TEST_SCIPY, "Scipy required for the test.") @dtypes(torch.float, torch.cfloat) @parametrize_test("mode", ("valid", "same")) def test_conv1d_vs_scipy(self, device, dtype, mode): t = make_tensor((1, 10), device=device, dtype=dtype) feat_dim = t.shape[1] weight_even = make_tensor((1, 1, 4), device=device, dtype=dtype) weight_odd = make_tensor((1, 1, 5), device=device, dtype=dtype) def _test(t, weight, mode): # SciPy expects two 1-D inputs. t_a = t.view(-1).cpu().numpy() w_a = weight.view(-1).cpu().numpy() expected = scipy.signal.convolve(t_a, w_a, mode=mode) kwargs = {"padding": mode} if mode == "same": # `same` padding in PyTorch conv1d is different # from SciPy p = weight.shape[2] // 2 t = torch.nn.functional.pad(t, (p, p)) # We have already taken care of padding kwargs.pop("padding") # second input is flipped in SciPy's convolve weight_flipped = torch.flip(weight, (2,)) actual = torch.nn.functional.conv1d(t, weight_flipped, **kwargs).squeeze(0) if mode == "same": actual = actual[:feat_dim] self.assertEqual(actual, expected, atol=2e-5, rtol=2e-5) # Global dtype for this test suite is torch.double # This leads to change in type-promotion # and conv1d outputs `complex128` for `complex64` input. with set_default_dtype(torch.float): _test(t, weight_even, mode) _test(t, weight_odd, mode) @unittest.skipIf(not TEST_SCIPY, "Scipy required for the test.") @dtypes(torch.float, torch.cfloat) @parametrize_test("mode", ("valid", "same")) def test_conv2d_vs_scipy(self, device, dtype, mode): t = make_tensor((1, 5, 10), device=device, dtype=dtype) weight_even = make_tensor((1, 1, 2, 4), device=device, dtype=dtype) weight_odd = make_tensor((1, 1, 3, 5), device=device, dtype=dtype) def _test(t, weight, mode): # SciPy expects two 2-D inputs. t_a = t.squeeze(0).cpu().numpy() w_a = weight.squeeze(0).squeeze(0).cpu().numpy() expected = scipy.signal.convolve2d(t_a, w_a, mode=mode) kwargs = {"padding": mode} if mode == "same": # `same` padding in PyTorch conv2d is different # from SciPy left_right_pad = weight.shape[3] // 2 top_bottom_pad = weight.shape[2] // 2 p = (left_right_pad, left_right_pad, top_bottom_pad, top_bottom_pad) t = torch.nn.functional.pad(t, p) # We have already taken care of padding kwargs.pop("padding") # second input is flipped in SciPy's convolve2d weight_flipped = torch.flip(weight, (2, 3)) actual = torch.nn.functional.conv2d(t, weight_flipped, **kwargs).squeeze(0) if mode == "same": actual = actual[:5, :10] self.assertEqual(actual, expected, rtol=2e-5, atol=5e-6) # Global dtype for this test suite is torch.double # This leads to change in type-promotion # and conv1d outputs `complex128` for `complex64` input. with set_default_dtype(torch.float): _test(t, weight_even, mode) _test(t, weight_odd, mode) @unittest.skipIf(not TEST_SCIPY, "Scipy required for the test.") @dtypes(torch.float, torch.cfloat) @parametrize_test("mode", ("valid", "same")) def test_conv3d_vs_scipy(self, device, dtype, mode): t = make_tensor((1, 5, 5, 10), device=device, dtype=dtype) weight_even = make_tensor((1, 1, 2, 2, 4), device=device, dtype=dtype) weight_odd = make_tensor((1, 1, 2, 3, 5), device=device, dtype=dtype) def _test(t, weight, mode): # SciPy expects two 3-D inputs. t_a = t.squeeze(0).cpu().numpy() w_a = weight.squeeze(0).squeeze(0).cpu().numpy() expected = scipy.signal.convolve(t_a, w_a, mode=mode) kwargs = {"padding": mode} if mode == "same": # `same` padding in PyTorch conv3d is different # from SciPy left_right_pad = weight.shape[4] // 2 top_bottom_pad = weight.shape[3] // 2 front_back_pad = weight.shape[2] // 2 p = ( left_right_pad, left_right_pad, top_bottom_pad, top_bottom_pad, front_back_pad, front_back_pad, ) t = torch.nn.functional.pad(t, p) # We have already taken care of padding kwargs.pop("padding") # second input is flipped in SciPy's convolve weight_flipped = torch.flip(weight, (2, 3, 4)) actual = torch.nn.functional.conv3d(t, weight_flipped, **kwargs).squeeze(0) if mode == "same": actual = actual[:5, :5, :10] if tf32_is_not_fp32() and ( dtype == torch.float or dtype == torch.complex64 ): self.assertEqual(actual, expected, atol=0.05, rtol=0.05) else: self.assertEqual(actual, expected, rtol=2e-5, atol=5e-6) # Global dtype for this test suite is torch.double # This leads to change in type-promotion # and conv1d outputs `complex128` for `complex64` input. with set_default_dtype(torch.float): _test(t, weight_even, mode) _test(t, weight_odd, mode) @dtypes(torch.float, torch.complex64) def test_conv2d_valid_padding_backward(self, device, dtype): # Test F.conv2d gradients work with padding='valid' x = torch.rand(1, 1, 1, 10, device=device, dtype=dtype, requires_grad=True) y = torch.rand(1, 1, 1, 4, device=device, dtype=dtype, requires_grad=True) F.conv2d(x, y, padding=0).sum().abs().backward() gx_expect, gy_expect = x.grad, y.grad x.grad, y.grad = None, None F.conv2d(x, y, padding="valid").sum().abs().backward() gx_actual, gy_actual = x.grad, y.grad self.assertEqual(gx_expect, gx_actual) self.assertEqual(gy_expect, gy_actual) @dtypes(torch.double, torch.cdouble) def test_conv3d_valid_padding_backward(self, device, dtype): check_forward_ad = torch.device(device).type != "xla" # Test F.conv3d gradients work with padding='valid' x = torch.rand(1, 1, 1, 1, 10, dtype=dtype, device=device, requires_grad=True) y = torch.rand(1, 1, 1, 1, 4, dtype=dtype, device=device, requires_grad=True) F.conv3d(x, y, padding=0).sum().abs().backward() gx_expect, gy_expect = x.grad, y.grad x.grad, y.grad = None, None F.conv3d(x, y, padding="valid").sum().abs().backward() gx_actual, gy_actual = x.grad, y.grad self.assertEqual(gx_expect, gx_actual) self.assertEqual(gy_expect, gy_actual) gradcheck( lambda x, y: F.conv3d(x, y, padding="valid"), (x, y), check_forward_ad=check_forward_ad, ) gradgradcheck( lambda x, y: F.conv3d(x, y, padding="valid"), (x, y), check_fwd_over_rev=check_forward_ad, ) @parametrize_test("N", range(2, 4), name_fn=lambda N: f"ConvTranspose{N}d") def test_conv_transpose_with_output_size_and_no_batch_dim(self, device, N): # For inputs with no batch dim, verify output is the correct shape when output_size is set. # See https://github.com/pytorch/pytorch/issues/75889 inp = torch.randn((1, 15, 13) if N == 2 else (1, 15, 13, 13), device=device) output_size = (1, 240, 200) if N == 2 else (1, 240, 200, 200) ConvTransposeNd = getattr(nn, f"ConvTranspose{N}d") m = ConvTransposeNd( 1, 1, kernel_size=16, stride=16, padding=7, bias=False, device=device ) output = m(inp, output_size=output_size) self.assertEqual(output.shape, output_size) @skipMeta @parametrize_test( "input_shape,transposed,dilated,groups,layout,backend_expected", [ # === slow === subtest( ( (2, 6, 7), False, False, 3, torch.strided, torch._C._ConvBackend.Slow2d, ), decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name="slow1d", ), subtest( ( (2, 6, 7), True, False, 3, torch.strided, torch._C._ConvBackend.SlowTranspose2d, ), decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name="slow1d_transposed", ), subtest( ( (2, 6, 7), False, True, 3, torch.strided, torch._C._ConvBackend.SlowDilated2d, ), decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name="slow1d_dilated", ), subtest( ( (2, 6, 7), True, True, 3, torch.strided, torch._C._ConvBackend.SlowTranspose2d, ), decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name="slow1d_dilated_transposed", ), subtest( ( (2, 6, 7, 8), False, False, 3, torch.strided, torch._C._ConvBackend.Slow2d, ), decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name="slow2d", ), subtest( ( (2, 6, 7, 8), True, False, 3, torch.strided, torch._C._ConvBackend.SlowTranspose2d, ), decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name="slow2d_transposed", ), subtest( ( (2, 6, 7, 8), False, True, 3, torch.strided, torch._C._ConvBackend.SlowDilated2d, ), decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name="slow2d_dilated", ), subtest( ( (2, 6, 7, 8), True, True, 3, torch.strided, torch._C._ConvBackend.SlowTranspose2d, ), decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name="slow2d_dilated_transposed", ), subtest( ( (2, 6, 7, 8, 9), False, False, 3, torch.strided, torch._C._ConvBackend.Slow3d, ), decorators=[onlyCPU, disableMkldnn], name="slow3d_cpu", ), # CUDA doesn't have a slow 3D implementation, so it goes to the dilated 3D implementation instead subtest( ( (2, 6, 7, 8, 9), False, False, 3, torch.strided, torch._C._ConvBackend.SlowDilated3d, ), decorators=[onlyCUDA, disablecuDNN], name="slow3d_cuda", ), # FIXME: RuntimeError: CUDA out of memory. # subtest(((2, 6, 7, 8, 9), True, False, 3, torch.strided, torch._C._ConvBackend.SlowTranspose3d), # decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name='slow3d_transposed'), subtest( ( (2, 6, 7, 8, 9), False, True, 3, torch.strided, torch._C._ConvBackend.SlowDilated3d, ), decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name="slow3d_dilated", ), # FIXME: RuntimeError: CUDA out of memory. # subtest(((2, 6, 7, 8, 9), True, True, 3, torch.strided, torch._C._ConvBackend.SlowTranspose3d), # decorators=[onlyNativeDeviceTypes, disableMkldnn, disablecuDNN], name='slow3d_dilated_transposed'), subtest( ( (0, 6, 7), False, False, 3, torch.strided, torch._C._ConvBackend.Empty, ), decorators=[onlyNativeDeviceTypes, disableMkldnn], name="empty_batch1d", ), subtest( ( (2, 0, 7), False, False, 3, torch.strided, torch._C._ConvBackend.Empty, ), decorators=[onlyNativeDeviceTypes, disableMkldnn], name="empty_channel1d", ), subtest( ( (0, 0, 7), False, False, 3, torch.strided, torch._C._ConvBackend.Empty, ), decorators=[onlyNativeDeviceTypes, disableMkldnn], name="empty_batch_channel1d", ), subtest( ( (0, 6, 7, 8), False, False, 3, torch.strided, torch._C._ConvBackend.Empty, ), decorators=[onlyNativeDeviceTypes, disableMkldnn], name="empty_batch2d", ), subtest( ( (2, 0, 7, 8), False, False, 3, torch.strided, torch._C._ConvBackend.Empty, ), decorators=[onlyNativeDeviceTypes, disableMkldnn], name="empty_channel2d", ), subtest( ( (0, 0, 7, 8), False, False, 3, torch.strided, torch._C._ConvBackend.Empty, ), decorators=[onlyNativeDeviceTypes, disableMkldnn], name="empty_batch_channel2d", ), subtest( ( (0, 6, 7, 8, 9), False, False, 3, torch.strided, torch._C._ConvBackend.Empty, ), decorators=[onlyNativeDeviceTypes, disableMkldnn], name="empty_batch3d", ), subtest( ( (2, 0, 7, 8, 9), False, False, 3, torch.strided, torch._C._ConvBackend.Empty, ), decorators=[onlyNativeDeviceTypes, disableMkldnn], name="empty_channel3d", ), subtest( ( (0, 0, 7, 8, 9), False, False, 3, torch.strided, torch._C._ConvBackend.Empty, ), decorators=[onlyNativeDeviceTypes, disableMkldnn], name="empty_batch_channel3d", ), # === cuda === # Note that disablecuDNN disables miopen as well. subtest( ( (2, 6, 7), False, False, 6, torch.strided, torch._C._ConvBackend.CudaDepthwise2d, ), decorators=[onlyCUDA, disablecuDNN], name="cuda_depthwise1d", ), subtest( ( (2, 6, 7, 8), False, False, 6, torch.strided, torch._C._ConvBackend.CudaDepthwise2d, ), decorators=[onlyCUDA, disablecuDNN], name="cuda_depthwise2d", ), subtest( ( (2, 6, 7, 8, 9), False, False, 6, torch.strided, torch._C._ConvBackend.CudaDepthwise3d, ), decorators=[onlyCUDA, disablecuDNN], name="cuda_depthwise3d", ), # === cudnn === subtest( ( (2, 6, 7), False, False, 3, torch.strided, torch._C._ConvBackend.Cudnn, ), decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen], name="cudnn1d", ), subtest( ( (2, 6, 7, 8), False, False, 3, torch.strided, torch._C._ConvBackend.Cudnn, ), decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen], name="cudnn2d", ), subtest( ( (2, 6, 7, 8, 9), False, False, 3, torch.strided, torch._C._ConvBackend.Cudnn, ), decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen], name="cudnn3d", ), subtest( ( (2, 6, 7), True, False, 3, torch.strided, torch._C._ConvBackend.CudnnTranspose, ), decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen], name="cudnn1d_transposed", ), subtest( ( (2, 6, 7, 8), True, False, 3, torch.strided, torch._C._ConvBackend.CudnnTranspose, ), decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen], name="cudnn2d_transposed", ), # FIXME: RuntimeError: CUDA out of memory. # subtest(((2, 6, 7, 8, 9), True, False, 3, torch.strided, torch._C._ConvBackend.CudnnTranspose), # decorators=[onlyCUDA, skipCUDAIfNoCudnn, skipCUDAIfMiopen], name='cudnn3d_transposed'), # === miopen === subtest( ( (2, 6, 7), False, False, 3, torch.strided, torch._C._ConvBackend.Miopen, ), decorators=[onlyCUDA, skipCUDAIfNoMiopen], name="miopen1d", ), subtest( ( (2, 6, 7, 8), False, False, 3, torch.strided, torch._C._ConvBackend.Miopen, ), decorators=[onlyCUDA, skipCUDAIfNoMiopen], name="miopen2d", ), subtest( ( (2, 6, 7, 8, 9), False, False, 3, torch.strided, torch._C._ConvBackend.Miopen, ), decorators=[onlyCUDA, skipCUDAIfNoMiopen], name="miopen3d", ), subtest( ( (2, 6, 7), True, False, 3, torch.strided, torch._C._ConvBackend.MiopenTranspose, ), decorators=[onlyCUDA, skipCUDAIfNoMiopen], name="miopen1d_transposed", ), subtest( ( (2, 6, 7, 8), True, False, 3, torch.strided, torch._C._ConvBackend.MiopenTranspose, ), decorators=[onlyCUDA, skipCUDAIfNoMiopen], name="miopen2d_transposed", ), subtest( ( (2, 6, 7, 8, 9), True, False, 3, torch.strided, torch._C._ConvBackend.MiopenTranspose, ), decorators=[onlyCUDA, skipCUDAIfNoMiopen], name="miopen3d_transposed", ), subtest( ( (2, 6, 7), False, False, 6, torch.strided, torch._C._ConvBackend.MiopenDepthwise, ), decorators=[onlyCUDA, skipCUDAIfNoMiopen], name="miopen_depthwise1d", ), subtest( ( (2, 6, 7, 8), False, False, 6, torch.strided, torch._C._ConvBackend.MiopenDepthwise, ), decorators=[onlyCUDA, skipCUDAIfNoMiopen], name="miopen_depthwise2d", ), subtest( ( (2, 6, 7, 8, 9), False, False, 6, torch.strided, torch._C._ConvBackend.MiopenDepthwise, ), decorators=[onlyCUDA, skipCUDAIfNoMiopen], name="miopen_depthwise3d", ), # === mkldnn === subtest( ( (2, 6, 7), False, False, 3, torch._mkldnn, torch._C._ConvBackend.Mkldnn, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn1d", ), subtest( ( (2, 6, 7, 8), False, False, 3, torch._mkldnn, torch._C._ConvBackend.Mkldnn, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn2d", ), subtest( ( (2, 6, 7, 8, 9), False, False, 3, torch._mkldnn, torch._C._ConvBackend.Mkldnn, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn3d", ), # Transposed convolution is broken for mkldnn. See https://github.com/pytorch/pytorch/issues/68775. subtest( ( (2, 6, 7), True, False, 3, torch._mkldnn, torch._C._ConvBackend.Mkldnn, ), decorators=[onlyCPU, skipCPUIfNoMkldnn, unittest.expectedFailure], name="mkldnn1d_transposed", ), subtest( ( (2, 6, 7, 8), True, False, 3, torch._mkldnn, torch._C._ConvBackend.Mkldnn, ), decorators=[onlyCPU, skipCPUIfNoMkldnn, unittest.expectedFailure], name="mkldnn2d_transposed", ), subtest( ( (2, 6, 7, 8, 9), True, False, 3, torch._mkldnn, torch._C._ConvBackend.Mkldnn, ), decorators=[onlyCPU, skipCPUIfNoMkldnn, unittest.expectedFailure], name="mkldnn3d_transposed", ), subtest( ( (2, 6, 7), False, True, 3, torch.strided, torch._C._ConvBackend.Mkldnn, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn1d_cpu_input", ), subtest( ( (2, 6, 7, 8), False, True, 3, torch.strided, torch._C._ConvBackend.Mkldnn, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn2d_cpu_input", ), subtest( ( (2, 6, 7, 8, 9), False, True, 3, torch.strided, torch._C._ConvBackend.Mkldnn, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn3d_cpu_input", ), subtest( ( (0, 6, 7), False, False, 3, torch._mkldnn, torch._C._ConvBackend.MkldnnEmpty, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn_empty_batch1d", ), subtest( ( (2, 0, 7), False, False, 3, torch._mkldnn, torch._C._ConvBackend.MkldnnEmpty, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn_empty_channel1d", ), subtest( ( (0, 0, 7), False, False, 3, torch._mkldnn, torch._C._ConvBackend.MkldnnEmpty, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn_empty_batch_channel1d", ), subtest( ( (0, 6, 7, 8), False, False, 3, torch._mkldnn, torch._C._ConvBackend.MkldnnEmpty, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn_empty_batch2d", ), subtest( ( (2, 0, 7, 8), False, False, 3, torch._mkldnn, torch._C._ConvBackend.MkldnnEmpty, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn_empty_channel2d", ), subtest( ( (0, 0, 7, 8), False, False, 3, torch._mkldnn, torch._C._ConvBackend.MkldnnEmpty, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn_empty_batch_channel2d", ), subtest( ( (0, 6, 7, 8, 9), False, False, 3, torch._mkldnn, torch._C._ConvBackend.MkldnnEmpty, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn_empty_batch3d", ), subtest( ( (2, 0, 7, 8, 9), False, False, 3, torch._mkldnn, torch._C._ConvBackend.MkldnnEmpty, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn_empty_channel3d", ), subtest( ( (0, 0, 7, 8, 9), False, False, 3, torch._mkldnn, torch._C._ConvBackend.MkldnnEmpty, ), decorators=[onlyCPU, skipCPUIfNoMkldnn], name="mkldnn_empty_batch_channel3d", ), # Note: Tests for mobile backends are not currently supported. This comprises # NnpackSpatial, Winograd3x3Depthwise, and Xnnpack2d backends. Testing these # requires the ability to gate tests by whether PyTorch is built with USE_MOBILE=1. ], ) # Test with both bias and no bias. @parametrize_test("has_bias", [False, True]) # Test with both stride=1 and stride>1 cases. @parametrize_test("strided", [False, True]) # Test with both contiguous and non-contiguous inputs. @parametrize_test("contiguous", [False, True]) def test_conv_backend( self, device, input_shape, has_bias, strided, contiguous, transposed, dilated, groups, layout, backend_expected, ): # Build up inputs. dtype = torch.float32 C_in, C_out, dim, kernel_size = input_shape[1], 12, len(input_shape) - 2, 3 x = torch.randn(*input_shape, device=device, dtype=dtype, requires_grad=True) weight = torch.randn( C_in if transposed else C_out, C_out // groups if transposed else C_in // groups, *[kernel_size for _ in range(dim)], device=device, dtype=dtype, requires_grad=True, ) bias = ( torch.randn(C_out, device=device, dtype=dtype, requires_grad=True) if has_bias else None ) def _make_noncontiguous(inp): if inp is None: return None old_requires_grad = inp.requires_grad inp = torch.repeat_interleave(inp, 2, dim=-1) inp = inp[..., ::2].detach().requires_grad_(old_requires_grad) return inp if not contiguous: x = _make_noncontiguous(x) weight = _make_noncontiguous(weight) bias = _make_noncontiguous(bias) if layout is torch._mkldnn: x = x.to_mkldnn() # Note that weight and bias are not supported as mkldnn tensors during training. stride = (2,) * dim if strided else (1,) * dim padding = (0,) * dim dilation = (2,) * dim if dilated else (1,) * dim output_padding = (0,) * dim inputs = [ x, weight, bias, stride, padding, dilation, transposed, output_padding, groups, ] # Ensure correct backend is selected. backend_actual = torch._C._select_conv_backend(*inputs) self.assertEqual(backend_actual, backend_expected) # Ensure backward call succeeds. convolution = torch.ops.aten.convolution output = convolution(*inputs) grad_output = torch.randn(output.shape, device=device, dtype=dtype) if not contiguous: grad_output = _make_noncontiguous(grad_output) if layout is torch._mkldnn: grad_output = grad_output.to_mkldnn() output.backward(grad_output) # mkldnn doesn't support gradcheck :( if layout is torch._mkldnn: return if backend_actual != torch._C._ConvBackend.Empty: # FIXME: forward AD fails # Forward AD and forward-over-reverse AD smoke test in float32 # TODO: remove this if we introduce per-op gradient tests for float32 with fwAD.dual_level(): dual_inputs = [ ( fwAD.make_dual(i, torch.rand_like(i)) if isinstance(i, torch.Tensor) else i ) for i in inputs ] # Forward AD output = convolution(*dual_inputs) # Forward over reverse AD grad_output_d = fwAD.make_dual( torch.rand_like(output), torch.rand_like(output) ) if has_bias: torch.autograd.grad(output, [x, weight, bias], grad_output_d) else: torch.autograd.grad(output, [x, weight], grad_output_d) # Convert to float64 for gradcheck. x = x.to(torch.float64).detach().requires_grad_(True) weight = weight.to(torch.float64).detach().requires_grad_(True) if bias is not None: bias = bias.to(torch.float64).detach().requires_grad_(True) inputs = [ x, weight, bias, stride, padding, dilation, transposed, output_padding, groups, ] # Set some backend-specific validation settings. gradcheck_nondet_tol = 0.0 if torch.backends.cudnn.is_available(): # cuDNN introduces non-determinism gradcheck_nondet_tol = GRADCHECK_NONDET_TOL self.assertTrue(gradcheck(convolution, inputs, nondet_tol=gradcheck_nondet_tol)) # double backward doesn't support bias gradients if bias is not None: bias.requires_grad_(False) self.assertTrue( gradgradcheck(convolution, inputs, nondet_tol=gradcheck_nondet_tol) ) @onlyCPU def test_conv_contiguous_for_oneDNN(self): # See https://github.com/pytorch/pytorch/issues/80837. for dtype in [torch.float, torch.bfloat16, torch.half]: conv = nn.Conv2d( 1, 128, kernel_size=(5, 2), stride=(2, 1), padding=(0, 1), dilation=(1, 1), groups=1, bias=True, padding_mode="zeros", ).to(dtype=dtype) x = torch.rand([1, 2, 321, 201, 1]).to(dtype=dtype) x = torch.transpose(x, 1, 4) x2 = x[..., 0] inputs = [ x2, conv.weight, conv.bias, (2, 1), (0, 1), (1, 1), False, (0, 1), 1, ] if torch.backends.mkldnn.is_available(): y = conv(x2) # Disable MKLDNN explicitly with torch.backends.mkldnn.flags(enabled=False): y_ = conv(x2) self.assertEqual(y, y_) @onlyCPU def test_conv_ic1_channels_last_for_oneDNN(self): # See https://github.com/pytorch/pytorch/issues/82060, N > 1 will call in OneDNN path. for dtype in [torch.float, torch.bfloat16, torch.half]: conv = torch.nn.Conv2d( 1, 64, kernel_size=(3, 3), padding=(1, 1), bias=False ) conv = conv.to(memory_format=torch.channels_last).to(dtype=dtype) x = torch.rand(2, 1, 100, 100).to(dtype=dtype) if torch.backends.mkldnn.is_available(): y = conv(x) # Disable MKLDNN explicitly with torch.backends.mkldnn.flags(enabled=False): y_ = conv(x) self.assertEqual(y, y_) @dtypes(torch.float, torch.cfloat) def test_conv_empty_channel(self, device, dtype): in_channels = 0 mod = torch.nn.Conv1d(in_channels, 8, 2, stride=2, dtype=dtype).to(device) inp = torch.randn(2, 0, 15, device=device, dtype=dtype) _test_module_empty_input(self, mod, inp, check_size=False) with self.assertRaisesRegex(RuntimeError, "Given groups=1, weight"): inp = torch.randn(2, 1, 0, device=device, dtype=dtype) mod(inp) mod = torch.nn.Conv2d(in_channels, 33, 3, stride=2, dtype=dtype).to(device) inp = torch.randn(2, 0, 50, 100, device=device, dtype=dtype) _test_module_empty_input(self, mod, inp, check_size=False) with self.assertRaisesRegex(RuntimeError, "Given groups=1, weight"): inp = torch.randn(2, 1, 40, 0, device=device, dtype=dtype) mod(inp) mod = torch.nn.Conv3d(in_channels, 33, 3, stride=2, dtype=dtype).to(device) inp = torch.randn(2, 0, 50, 20, 40, device=device, dtype=dtype) _test_module_empty_input(self, mod, inp, check_size=False) with self.assertRaisesRegex(RuntimeError, "Given groups=1, weight"): inp = torch.randn(2, 1, 50, 0, 40, device=device, dtype=dtype) mod(inp) def test_group_conv_empty(self, device): mod = torch.nn.Conv2d(4, 4, stride=2, kernel_size=3, padding=1, groups=4).to( device ) inp = torch.randn(0, 4, 4, 4, device=device) _test_module_empty_input(self, mod, inp, check_size=False) if self.device_type == "cuda" and self.has_cudnn(): with torch.backends.cudnn.flags(enabled=False): _test_module_empty_input(self, mod, inp, check_size=False) def test_group_convTranspose_empty(self, device): mod = torch.nn.ConvTranspose2d( 4, 4, stride=2, kernel_size=3, padding=1, groups=4 ).to(device) inp = torch.randn(0, 4, 4, 4, device=device) _test_module_empty_input(self, mod, inp, check_size=False) if self.device_type == "cuda" and self.has_cudnn(): with torch.backends.cudnn.flags(enabled=False): _test_module_empty_input(self, mod, inp, check_size=False) def test_convTranspose_empty(self, device): mod = torch.nn.ConvTranspose2d(4, 4, stride=2, kernel_size=3, padding=1).to( device ) inp = torch.randn(0, 4, 4, 4, device=device) _test_module_empty_input(self, mod, inp, check_size=False) if self.device_type == "cuda" and self.has_cudnn(): with torch.backends.cudnn.flags(enabled=False): _test_module_empty_input(self, mod, inp, check_size=False) @onlyCUDA @largeTensorTest("12GB") def test_conv_large_nosplit(self, device): # Here we just test the convolution correctly route to the fallback implementation # that is, it does not crash. The correctness of fallback implementation should be # covered in other tests dtype = torch.half if self.device_type == "cuda" else torch.float conv1 = nn.Conv2d(2, 2, 8, 8).to(device).to(dtype) input_large = torch.randn(1, 2, 1024, 1024 * 1024, dtype=dtype, device=device) conv1(input_large) conv2 = torch.nn.Conv2d(1, 1024, 1, 1).to(device).to(dtype) input_large = torch.randn(1, 1, 2048, 1024, dtype=dtype, device=device) conv2(input_large) def test_conv_noncontig_weights(self, device): for dim in (1, 2, 3): for grouped in (False, True): nc = 3 groups = 3 if grouped else 1 w = torch.randn([3] * dim, device=device) w = w.expand([nc, int(nc / groups)] + list(w.shape)) w = w.detach().requires_grad_() x = torch.randn( [1, nc] + ([5] * dim), device=device, requires_grad=True ) y = getattr(F, f"conv{dim}d")(x, w, groups=groups) y.sum().backward() y = getattr(F, f"conv_transpose{dim}d")(x, w, groups=groups) y.sum().backward() def test_conv_noncontig_weights_and_bias(self, device): # need floats to exercise https://github.com/pytorch/pytorch/issues/16018 for bias in [True, False]: conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=bias).to( device, torch.float ) input_nc = torch.randn( (1, 3, 224, 224, 2), device=device, dtype=torch.float )[:, :, :, :, 1] input_c = input_nc.contiguous() weight_nc = torch.randn((64, 3, 7, 7, 2), device=device, dtype=torch.float)[ :, :, :, :, 1 ] conv1.weight = nn.Parameter(weight_nc) weight_c = conv1.weight.contiguous() if bias: bias_nc = torch.randn((64, 2), device=device, dtype=torch.float)[:, 1] conv1.bias = nn.Parameter(bias_nc) bias_c = conv1.bias.contiguous() out1 = conv1(input_nc) conv1.weight = nn.Parameter(weight_c) if bias: conv1.bias = nn.Parameter(bias_c) out2 = conv1(input_c) self.assertEqual(out1, out2) @onlyCUDA @largeTensorTest("12GB") @skipIfRocmVersionLessThan((6, 0)) def test_conv_transposed_large(self, device): dtype = torch.half if self.device_type == "cuda" else torch.float conv = nn.ConvTranspose2d(1, 1, 1, 1, bias=False).to(device).to(dtype) input_large = torch.randn(4096, 1, 512, 1024, dtype=dtype, device=device) # forward ret = conv(input_large) maxdiff0 = ( (ret.narrow(0, 0, 1024) - conv(input_large.narrow(0, 0, 1024))) .abs_() .max() .item() ) maxdiff1 = ( (ret.narrow(0, 1024, 1024) - conv(input_large.narrow(0, 1024, 1024))) .abs_() .max() .item() ) maxdiff2 = ( (ret.narrow(0, 2048, 1024) - conv(input_large.narrow(0, 2048, 1024))) .abs_() .max() .item() ) maxdiff3 = ( (ret.narrow(0, 3072, 1024) - conv(input_large.narrow(0, 3072, 1024))) .abs_() .max() .item() ) if self.device_type == "cuda": # cuDNN may use algorithms such as FFT that don't guarantee a diff of 0 self.assertEqual(maxdiff0, 0, atol=2e-3, rtol=1e-5) self.assertEqual(maxdiff1, 0, atol=2e-3, rtol=1e-5) self.assertEqual(maxdiff2, 0, atol=2e-3, rtol=1e-5) self.assertEqual(maxdiff3, 0, atol=2e-3, rtol=1e-5) else: self.assertEqual(maxdiff0, 0) self.assertEqual(maxdiff1, 0) self.assertEqual(maxdiff2, 0) self.assertEqual(maxdiff3, 0) @onlyCUDA @skipCUDAIfRocm @largeTensorTest("12GB") def test_conv_large(self, device): dtype = torch.half if self.device_type == "cuda" else torch.float conv = nn.Conv2d(2, 2, 8, 8, bias=False).to(device).to(dtype) input_large = torch.randn(4097, 2, 512, 512, dtype=dtype, device=device) # forward ret = conv(input_large) self.assertEqual(ret[:2048], conv(input_large[:2048])) self.assertEqual(ret[2048:4096], conv(input_large[2048:4096])) self.assertEqual(ret[4096:], conv(input_large[4096:])) # backward conv.zero_grad() # When computing the backward, we are using the `max(dim=1)`` to create # some sparsity. Without this sparsity, the rounding error would be # too large (as large as 1e-5) to satisfy the creterion (1e-6) of `assertEqual` ret.view(4097, -1).max(dim=1).values.sum().backward() del ret grad1 = conv.weight.grad.detach().clone() conv.zero_grad() conv(input_large[:2048]).view(2048, -1).max(dim=1).values.sum().backward() conv(input_large[2048:4096]).view(2048, -1).max(dim=1).values.sum().backward() conv(input_large[4096:]).view(1, -1).max(dim=1).values.sum().backward() grad2 = conv.weight.grad.detach().clone() # gradients are at the order of hundreds, we need to scale it to # the order of one so that we can compare scale = 1 / grad2.abs().mean() grad1 = grad1 * scale grad2 = grad2 * scale self.assertEqual(grad1, grad2, atol=5e-2, rtol=5e-3) @onlyCUDA @skipCUDAIfRocm @largeTensorTest("20GB", "cpu") @largeTensorTest("60GB", "cuda") def test_conv_large_batch_1(self, device): in_channels = 514 dim = 2048 out_channels = 1 kernel_size = 3 stride = 1 padding = 1 input_tensor = torch.ones(1, in_channels, dim, dim).cuda().half() model = ( nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding) .cuda() .half() ) output = model(input_tensor) model_cpu = model.cpu().float() output_cpu = model(input_tensor.float().cpu()) self.assertEqual(output.cpu().float(), output_cpu, atol=1e-3, rtol=1e-3) @onlyCUDA @skipCUDAIfRocm @largeTensorTest("24GB", "cpu") @largeTensorTest("20GB", "cuda") def test_conv3d_large_batch_1(self, device): x = torch.rand(1, 32, 512, 512, 256) m = torch.nn.Conv3d(32, 1, kernel_size=1, padding=0, stride=1, bias=False) yref = m(x) y = m.to(device=device)(x.to(device=device)) self.assertEqual(yref, y.cpu()) @onlyCUDA @skipCUDAIfNoCudnn def test_contig_wrong_stride_cudnn(self, device): # x has to have batch_size 1 to test contiguous checks x = torch.randn(1, 16, 5, 5, device=device) stride = list(x.stride()) stride[0] = 20 # change the stride in dimension 0. the tensor is still contiguous because size[0] is 1 x.set_(x.storage(), 0, x.size(), stride) self.assertTrue(x.is_contiguous()) F.conv_transpose2d(x, torch.randn(16, 1, 1, 1, device=device)) F.conv2d(x, torch.randn(1, 16, 1, 1, device=device)) @onlyCUDA @tf32_on_and_off(0.005) def test_Conv2d_size_1_kernel(self, device): x_cpu = torch.randn(2, 3, 5, 5) conv_cpu = torch.nn.Conv2d(3, 3, kernel_size=1) y_cpu = conv_cpu(x_cpu) y = torch.rand_like(y_cpu) y_cpu.backward(y) with cudnn.flags(enabled=False): conv_cuda = torch.nn.Conv2d(3, 3, kernel_size=1).to(device) conv_cuda.bias.data.copy_(conv_cpu.bias.data) conv_cuda.weight.data.copy_(conv_cpu.weight.data) y_cuda = conv_cuda(x_cpu.to(device)) y_cuda.backward(y.to(device)) self.assertEqual(y_cpu, y_cuda, atol=1e-5, rtol=0, exact_device=False) self.assertEqual( conv_cpu.bias.grad.data, conv_cuda.bias.grad.data, atol=1e-5, rtol=0, exact_device=False, ) self.assertEqual( conv_cpu.weight.grad.data, conv_cuda.weight.grad.data, atol=1e-5, rtol=0, exact_device=False, ) @onlyCUDA @tf32_on_and_off(0.005) def test_ConvTranspose2d_size_1_kernel(self, device): x_cpu = torch.randn(2, 3, 5, 5) conv_cpu = torch.nn.ConvTranspose2d(3, 3, kernel_size=1) y_cpu = conv_cpu(x_cpu) y = torch.rand_like(y_cpu) y_cpu.backward(y) with cudnn.flags(enabled=False): conv_cuda = torch.nn.ConvTranspose2d(3, 3, kernel_size=1).to(device) conv_cuda.bias.data.copy_(conv_cpu.bias.data) conv_cuda.weight.data.copy_(conv_cpu.weight.data) y_cuda = conv_cuda(x_cpu.to(device)) y_cuda.backward(y.to(device)) self.assertEqual(y_cpu, y_cuda, atol=1e-5, rtol=0, exact_device=False) self.assertEqual( conv_cpu.bias.grad.data, conv_cuda.bias.grad.data, atol=1e-5, rtol=0, exact_device=False, ) self.assertEqual( conv_cpu.weight.grad.data, conv_cuda.weight.grad.data, atol=1e-5, rtol=0, exact_device=False, ) @onlyCUDA def test_ConvTranspose3d_size_1_kernel(self, device): with set_default_dtype(torch.double): x_cpu = torch.randn(2, 3, 3, 5, 5) conv_cpu = torch.nn.ConvTranspose3d(3, 3, kernel_size=1) y_cpu = conv_cpu(x_cpu) y = torch.rand_like(y_cpu) y_cpu.backward(y) with cudnn.flags(enabled=False): conv_cuda = torch.nn.ConvTranspose3d(3, 3, kernel_size=1).to(device) conv_cuda.bias.data.copy_(conv_cpu.bias.data) conv_cuda.weight.data.copy_(conv_cpu.weight.data) y_cuda = conv_cuda(x_cpu.to(device)) y_cuda.backward(y.to(device)) self.assertEqual(y_cpu, y_cuda, atol=1e-5, rtol=0, exact_device=False) self.assertEqual( conv_cpu.bias.grad.data, conv_cuda.bias.grad.data, atol=1e-5, rtol=0, exact_device=False, ) self.assertEqual( conv_cpu.weight.grad.data, conv_cuda.weight.grad.data, atol=1e-5, rtol=0, exact_device=False, ) @dtypesIfCUDA( *floating_types_and(torch.half, *[torch.bfloat16] if AMPERE_OR_ROCM else []) ) @dtypes(torch.float) @torch.backends.cudnn.flags(enabled=True, benchmark=False) @unittest.skipIf(TEST_WITH_ROCM, "Skipped on ROCm, since it is failing on ROCm 5.7") def test_Conv2d_naive_groups(self, device, dtype): # Check that grouped convolutions matches two half convolutions m = nn.Conv2d(4, 4, kernel_size=3, groups=2).to(device, dtype) i = torch.randn(2, 4, 6, 6, device=device, dtype=dtype, requires_grad=True) output = m(i) grad_output = torch.randn(2, 4, 4, 4, device=device, dtype=dtype) output.backward(grad_output) m1 = nn.Conv2d(2, 2, kernel_size=3).to(device, dtype) m1.weight.data.copy_(m.weight.data[:2]) m1.bias.data.copy_(m.bias.data[:2]) i1 = i.data[:, :2].contiguous().requires_grad_(True) output1 = m1(i1) output1.backward(grad_output[:, :2].contiguous()) m2 = nn.Conv2d(2, 2, kernel_size=3).to(device, dtype) m2.weight.data.copy_(m.weight.data[2:]) m2.bias.data.copy_(m.bias.data[2:]) i2 = i.data[:, 2:].contiguous().requires_grad_(True) output2 = m2(i2) output2.backward(grad_output[:, 2:].contiguous()) self.assertEqual(output, torch.cat([output1, output2], 1)) self.assertEqual( i.grad.data, torch.cat([i1.grad.data, i2.grad.data], 1), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) self.assertEqual( m.bias.grad.data, torch.cat([m1.bias.grad.data, m2.bias.grad.data], 0), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) self.assertEqual( m.weight.grad.data, torch.cat([m1.weight.grad.data, m2.weight.grad.data], 0), atol=dtype2prec_DONTUSE[dtype], rtol=0, ) @dtypes(torch.double, torch.cdouble) def test_Conv2d_backward_depthwise(self, device, dtype): x = torch.randn(2, 2, 4, 20, device=device, dtype=dtype, requires_grad=True) weight = torch.randn(2, 1, 3, 5, device=device, dtype=dtype, requires_grad=True) def conv2d_depthwise(x, weight): return torch.nn.functional.conv2d( x, weight, bias=None, stride=(1, 10), groups=2 ) for cudnn_enabled in [False, True]: with torch.backends.cudnn.flags(enabled=cudnn_enabled): torch.autograd.gradcheck(conv2d_depthwise, (x, weight)) @onlyCPU @dtypes(torch.float, torch.double) def test_conv_thnn_nhwc(self, device, dtype): def helper( mod, n, c, h, w, out_channels, kernel_size, dilation, groups, input_format, weight_format, ): input = torch.randint(-3, 3, (n, c, h, w), dtype=dtype, device=device).to( memory_format=input_format ) input.requires_grad_() conv = mod( c, out_channels, kernel_size, dilation=dilation, groups=groups ).to(device="cpu", dtype=dtype, memory_format=weight_format) for p in conv.parameters(): p.data = torch.randint_like(p, -3, 3) ref_input = input.detach().clone().contiguous().requires_grad_() ref_conv = mod( c, out_channels, kernel_size, dilation=dilation, groups=groups ) # load_state_dict will restore the stride & memory_layout on ref_conv.weight. ref_conv.load_state_dict(conv.state_dict()) ref_conv = ref_conv.to( device="cpu", dtype=dtype, memory_format=torch.contiguous_format ) out = conv(input) ref_out = ref_conv(ref_input) grad = torch.randint_like(out, -3, 3) ref_grad = grad.detach().clone().contiguous() out.backward(grad) ref_out.backward(ref_grad) self.assertTrue(out.is_contiguous(memory_format=torch.channels_last)) self.assertTrue(ref_out.is_contiguous()) self.assertEqual(out, ref_out, exact_dtype=False) self.assertEqual(conv.weight.grad, ref_conv.weight.grad, exact_dtype=False) self.assertEqual(conv.bias.grad, ref_conv.bias.grad, exact_dtype=False) self.assertEqual(input.grad, ref_input.grad, exact_dtype=False) with torch.backends.mkldnn.flags(enabled=False): formats = [ [torch.channels_last, torch.channels_last], [torch.channels_last, torch.contiguous_format], [torch.contiguous_format, torch.channels_last], ] for input_format, weight_format in formats: # non-dilated conv: thnn_conv2d normal path (with im2col) helper( nn.Conv2d, 2, 8, 4, 4, out_channels=4, kernel_size=3, dilation=1, groups=1, input_format=input_format, weight_format=weight_format, ) helper( nn.Conv2d, 2, 8, 4, 4, out_channels=8, kernel_size=3, dilation=1, groups=8, input_format=input_format, weight_format=weight_format, ) # test when input chanels is 1 and not converted to channels last helper( nn.Conv2d, 2, 1, 10, 10, out_channels=8, kernel_size=3, dilation=1, groups=1, input_format=torch.contiguous_format, weight_format=torch.channels_last, ) # non-dilated conv: thnn_conv2d fast path (skip im2col) helper( nn.Conv2d, 1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=1, input_format=input_format, weight_format=weight_format, ) # ic == oc == 1 here, so need to stick input to CL to activate channels last helper( nn.Conv2d, 1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=16, input_format=torch.channels_last, weight_format=weight_format, ) # dilated conv: slow_conv_dilated2d helper( nn.Conv2d, 2, 8, 11, 13, out_channels=16, kernel_size=3, dilation=2, groups=1, input_format=input_format, weight_format=weight_format, ) helper( nn.Conv2d, 2, 16, 11, 13, out_channels=32, kernel_size=3, dilation=2, groups=16, input_format=input_format, weight_format=weight_format, ) # transposed-conv: slow_conv_transpose2d helper( nn.ConvTranspose2d, 2, 8, 4, 4, out_channels=4, kernel_size=3, dilation=1, groups=1, input_format=input_format, weight_format=weight_format, ) helper( nn.ConvTranspose2d, 2, 8, 4, 4, out_channels=8, kernel_size=3, dilation=1, groups=8, input_format=input_format, weight_format=weight_format, ) helper( nn.ConvTranspose2d, 1, 16, 56, 56, out_channels=16, kernel_size=1, dilation=1, groups=1, input_format=input_format, weight_format=weight_format, ) helper( nn.ConvTranspose2d, 1, 16, 56, 56, out_channels=32, kernel_size=1, dilation=1, groups=16, input_format=input_format, weight_format=weight_format, ) @onlyCUDA @skipCUDAIfRocmVersionLessThan((4, 3)) @skipCUDAIfNotMiopenSuggestNHWC @skipCUDAIfCudnnVersionLessThan(7603) @dtypes(torch.half, torch.float, torch.cfloat) def test_conv_cudnn_nhwc(self, device, dtype): def helper(n, c, h, w, out_channels, kernel_size, groups): input = torch.randint(-3, 3, (n, c, h, w), dtype=dtype, device=device).to( memory_format=torch.channels_last ) input.requires_grad_() conv = nn.Conv2d(c, out_channels, kernel_size, groups=groups).to( device="cuda", dtype=dtype, memory_format=torch.channels_last ) for p in conv.parameters(): p.data = torch.randint_like(p, -3, 3) # use FP64 channels-first conv as reference ref_input = input.detach().clone().contiguous().double().requires_grad_() ref_conv = nn.Conv2d(c, out_channels, kernel_size, groups=groups) # load_state_dict will restore the stride & memory_layout on ref_conv.weight. ref_conv.load_state_dict(conv.state_dict()) ref_conv = ref_conv.to( device="cuda", dtype=torch.double, memory_format=torch.contiguous_format ) out = conv(input) ref_out = ref_conv(ref_input) grad = torch.randint_like(out, -3, 3) ref_grad = grad.detach().clone().double().contiguous() out.backward(grad) ref_out.backward(ref_grad) self.assertTrue(out.is_contiguous(memory_format=torch.channels_last)) self.assertTrue(input.grad.is_contiguous(memory_format=torch.channels_last)) self.assertTrue( conv.weight.grad.is_contiguous(memory_format=torch.channels_last) ) self.assertTrue(ref_out.is_contiguous()) self.assertTrue(ref_input.grad.is_contiguous()) self.assertTrue(ref_conv.weight.grad.is_contiguous()) self.assertEqual(out, ref_out, exact_dtype=False) self.assertEqual(conv.weight.grad, ref_conv.weight.grad, exact_dtype=False) self.assertEqual(conv.bias.grad, ref_conv.bias.grad, exact_dtype=False) self.assertEqual(input.grad, ref_input.grad, exact_dtype=False) helper(2, 8, 4, 4, out_channels=4, kernel_size=3, groups=1) helper(2, 8, 4, 4, out_channels=8, kernel_size=3, groups=8) helper(1, 16, 56, 56, out_channels=16, kernel_size=3, groups=1) helper(1, 16, 56, 56, out_channels=16, kernel_size=3, groups=16) @onlyCUDA @skipCUDAIfRocm @skipCUDAIfCudnnVersionLessThan(8005) @dtypes(torch.half, torch.float) def test_conv_cudnn_ndhwc(self, device, dtype): def helper(n, c, d, h, w, out_channels, kernel_size, groups): input = torch.randint( -2, 2, (n, c, d, h, w), dtype=dtype, device=device ).to(memory_format=torch.channels_last_3d) input.requires_grad_() conv = nn.Conv3d(c, out_channels, kernel_size, groups=groups).to( device="cuda", dtype=dtype, memory_format=torch.channels_last_3d ) for p in conv.parameters(): p.data = torch.randint_like(p, -2, 2) # use FP64 channels-first conv as reference ref_input = input.detach().clone().contiguous().double().requires_grad_() ref_conv = nn.Conv3d(c, out_channels, kernel_size, groups=groups) # load_state_dict will restore the stride & memory_layout on ref_conv.weight. ref_conv.load_state_dict(conv.state_dict()) ref_conv = ref_conv.to( device="cuda", dtype=torch.double, memory_format=torch.contiguous_format ) out = conv(input) ref_out = ref_conv(ref_input) grad = torch.randint_like(out, -2, 2) ref_grad = grad.detach().clone().double().contiguous() out.backward(grad) ref_out.backward(ref_grad) self.assertTrue(out.is_contiguous(memory_format=torch.channels_last_3d)) self.assertTrue( input.grad.is_contiguous(memory_format=torch.channels_last_3d) ) self.assertTrue( conv.weight.grad.is_contiguous(memory_format=torch.channels_last_3d) ) self.assertTrue(ref_out.is_contiguous()) self.assertTrue(ref_input.grad.is_contiguous()) self.assertTrue(ref_conv.weight.grad.is_contiguous()) self.assertEqual(out, ref_out, exact_dtype=False) self.assertEqual(conv.weight.grad, ref_conv.weight.grad, exact_dtype=False) self.assertEqual(conv.bias.grad, ref_conv.bias.grad, exact_dtype=False) self.assertEqual(input.grad, ref_input.grad, exact_dtype=False) helper(2, 8, 4, 4, 4, out_channels=4, kernel_size=3, groups=1) helper(2, 8, 4, 4, 4, out_channels=8, kernel_size=3, groups=8) helper(1, 16, 18, 18, 18, out_channels=16, kernel_size=3, groups=1) helper(1, 16, 18, 18, 18, out_channels=16, kernel_size=3, groups=16) def _run_conv( self, layer, device, inp, grad, ref_conv, ref_input, ref_out, input_format, weight_format, grad_format, output_format, ): conv = ( layer(inp.size(1), grad.size(1), ref_conv.weight.size(2)).float().to(device) ) # load_state_dict will restore the stride & memory_layout on ref_conv.weight. conv.load_state_dict(ref_conv.state_dict()) weight_data = ( conv.weight.detach().clone().contiguous(memory_format=weight_format) ) conv.weight.data = weight_data.resize_( weight_data.size(), memory_format=weight_format ) input = inp.clone().contiguous(memory_format=input_format) input.resize_(input.size(), memory_format=input_format) input = input.requires_grad_() grad = grad.contiguous(memory_format=grad_format) grad.resize_(grad.size(), memory_format=grad_format) out = conv(input) out.backward(grad) self.assertTrue(out.is_contiguous(memory_format=output_format)) self.assertEqual(out, ref_out) self.assertEqual(conv.weight.grad, ref_conv.weight.grad) self.assertEqual(conv.bias.grad, ref_conv.bias.grad) self.assertEqual(input.grad, ref_input.grad) def _test_conv_cudnn_nhwc_nchw(self, layer, n, c, h, w, k, filter_size, device): data = torch.randint(1, 10, (n, c, h, w), dtype=torch.float32, device=device) ref_input = data.clone().contiguous().requires_grad_(True) ref_conv = layer(c, k, filter_size).float().to(device) ref_out = ref_conv(ref_input) grad = torch.randint(1, 10, ref_out.size(), dtype=torch.float32, device="cuda") ref_out.backward(grad) for w_f in [torch.contiguous_format, torch.channels_last]: for g_f in [torch.contiguous_format, torch.channels_last]: for input_format in [torch.contiguous_format, torch.channels_last]: output_format = torch.contiguous_format # Older versions of CudNN have Channels Last support disabled if torch.backends.cudnn.version() >= 7603: if input_format == torch.channels_last: output_format = torch.channels_last # This is because we have N111 weight that cannot handle # the ambiguous memory_format if w_f == torch.channels_last: if layer == nn.Conv2d and filter_size * c != 1: output_format = torch.channels_last if layer == nn.ConvTranspose2d and filter_size * k != 1: output_format = torch.channels_last self._run_conv( layer, device, data, grad, ref_conv, ref_input, ref_out, input_format, w_f, g_f, output_format, ) @onlyCUDA @skipCUDAIfRocmVersionLessThan((4, 3)) @skipCUDAIfNotMiopenSuggestNHWC @skipCUDAIfCudnnVersionLessThan(7603) @tf32_on_and_off(0.05) def test_conv_cudnn_mismatch_memory_format(self, device): configs = [ [4, 2, 8, 8, 4, 2], [4, 1, 8, 8, 4, 2], [1, 1, 8, 8, 4, 2], [4, 2, 2, 8, 4, 1], [4, 2, 1, 8, 4, 1], [4, 2, 8, 8, 4, 1], [4, 1, 8, 8, 4, 1], ] for n, c, h, w, k, filter_size in configs: self._test_conv_cudnn_nhwc_nchw( nn.Conv2d, n, c, h, w, k, filter_size, device ) self._test_conv_cudnn_nhwc_nchw( nn.ConvTranspose2d, n, c, h, w, k, filter_size, device ) # torch.half is erroring out on Windows with CUDA 10.1 + cuDNN 7.6.4 # returning CUDNN_STATUS_BAD_PARAM # Disabling that specific test for now [see issue # 33918] @onlyCUDA @skipCUDAIfNoCudnn @dtypes(torch.float, torch.double) def test_conv_cudnn_nhwc_support(self, device, dtype): input = torch.randn( (1, 16, 1, 1), dtype=dtype, device="cuda", requires_grad=True ) weight = torch.randn( (8, 16, 3, 3), dtype=dtype, device="cuda", requires_grad=True ) weight = weight.to(memory_format=torch.channels_last) o = torch.conv2d(input, weight, None, (2, 1), (1, 1), (1, 1), 1) self.assertTrue(o.is_contiguous(memory_format=torch.channels_last)) o.sum().backward() # Test that faster algorithms used for inference produce the same results # Validates depthwise3x3 bug reported in https://github.com/pytorch/pytorch/issues/60176 @onlyCPU @dtypes(torch.float) def test_conv2d_no_grad(self, device, dtype): for batch in [1, 2, 3]: for groups in [1, 2, 4]: input = torch.rand(batch, groups, 8, 8, dtype=dtype, device=device) m = nn.Conv2d( groups, 8, kernel_size=(3, 3), groups=groups, dtype=dtype, device=device, ) with torch.no_grad(): output_ng = m(input) output = m(input) self.assertEqual(output, output_ng, rtol=1e-2, atol=1e-5) @onlyCUDA @skipCUDAIfNoCudnn @dtypes(torch.float, torch.float16) @precisionOverride({torch.half: 0.002, torch.float: 1e-4}) def test_cudnn_convolution_relu(self, device, dtype): for batch, groups, image_size, kernel_size, memory_format in product( (1, 2, 3), (1, 2, 4), ((1, 1), (8, 8)), ((1, 1), (3, 3)), (torch.channels_last, torch.contiguous_format), ): if image_size[0] < kernel_size[0]: continue inp = torch.rand(batch, groups, *image_size, dtype=dtype, device=device) w = torch.randn(8, groups, *kernel_size, dtype=dtype, device=device) conv2d_out = torch.conv2d(inp, w, None, (1, 1), (0, 0), (1, 1), 1) inp = inp.to(memory_format=memory_format) w = w.to(memory_format=memory_format) if torch.version.hip: cudnn_out = torch.miopen_convolution_relu( inp, w, None, (1, 1), (0, 0), (1, 1), 1 ) else: cudnn_out = torch.cudnn_convolution_relu( inp, w, None, (1, 1), (0, 0), (1, 1), 1 ) self.assertTrue(cudnn_out.is_contiguous(memory_format=memory_format)) if tf32_is_not_fp32() and dtype == torch.float: self.assertEqual(conv2d_out.relu(), cudnn_out, atol=4e-3, rtol=0.006) else: self.assertEqual(conv2d_out.relu(), cudnn_out) @onlyCUDA @skipCUDAIfNoCudnn @dtypes(torch.float, torch.float16) @precisionOverride({torch.half: 0.002, torch.float: 1e-4}) def test_cudnn_convolution_add_relu(self, device, dtype): for batch, groups, image_size, kernel_size, memory_format in product( (1, 2, 3), (1, 2, 4), ((1, 1), (8, 8)), ((1, 1), (3, 3)), (torch.channels_last, torch.contiguous_format), ): if image_size[0] < kernel_size[0]: continue inp = torch.rand(batch, groups, *image_size, dtype=dtype, device=device) w = torch.randn(8, groups, *kernel_size, dtype=dtype, device=device) conv2d_out = torch.conv2d(inp, w, None, (1, 1), (0, 0), (1, 1), 1) alpha = 2.0 z = torch.randn_like(conv2d_out) inp = inp.to(memory_format=memory_format) w = w.to(memory_format=memory_format) z = z.to(memory_format=memory_format) if torch.version.hip: cudnn_out = torch.miopen_convolution_add_relu( inp, w, z, alpha, None, (1, 1), (0, 0), (1, 1), 1 ) else: cudnn_out = torch.cudnn_convolution_add_relu( inp, w, z, alpha, None, (1, 1), (0, 0), (1, 1), 1 ) self.assertTrue(cudnn_out.is_contiguous(memory_format=memory_format)) if tf32_is_not_fp32() and dtype == torch.float: self.assertEqual( F.relu(conv2d_out + alpha * z), cudnn_out, atol=2e-3, rtol=0.006 ) else: self.assertEqual(F.relu(conv2d_out + alpha * z), cudnn_out) @onlyCUDA @skipCUDAIfRocm @skipCUDAIfCudnnVersionLessThan(7603) def test_convert_conv2d_weight_memory_format(self, device): input = torch.randint(1, 10, (2, 8, 4, 4), dtype=torch.float32, device=device) model = nn.Sequential(nn.Conv2d(8, 4, 3), nn.BatchNorm2d(4)).to(device).float() for memory_format in [torch.channels_last, torch.contiguous_format]: model = nn.utils.convert_conv2d_weight_memory_format(model, memory_format) out = model(input) self.assertTrue(out.is_contiguous(memory_format=memory_format)) model = ( nn.Sequential(nn.ConvTranspose2d(8, 4, 3), nn.BatchNorm2d(4)) .to(device) .float() ) for memory_format in [torch.channels_last, torch.contiguous_format]: model = nn.utils.convert_conv2d_weight_memory_format(model, memory_format) out = model(input) self.assertTrue(out.is_contiguous(memory_format=memory_format)) @onlyCUDA @skipCUDAIfRocm @skipCUDAIfCudnnVersionLessThan(7603) def test_convert_conv3d_weight_memory_format(self, device): input = torch.randint( 1, 10, (2, 8, 4, 4, 4), dtype=torch.float32, device=device ) model = ( nn.Sequential(nn.ConvTranspose3d(8, 4, 3), nn.BatchNorm3d(4)) .to(device) .float() ) for memory_format in [torch.channels_last_3d, torch.contiguous_format]: model = nn.utils.convert_conv3d_weight_memory_format(model, memory_format) out = model(input) self.assertTrue(out.is_contiguous(memory_format=memory_format)) def test_conv_double_backward_strided_with_3D_input_and_weight(self, device): # Test that _convolution_double_backward() outputs the correct grad shapes # for 3D input / weight when stride > 1. This is an ad-hoc regression test for a # specific case that was uncovered during the convolution consolidation effort. # The test can be safely deleted if _convolution_double_backward() is removed. input = torch.randn(2, 3, 6, device=device) weight = torch.randn(3, 3, 3, device=device) bias = torch.randn(3, device=device) stride = (2,) padding = (1,) dilation = (1,) transposed = False output_padding = (0,) groups = 1 output = torch.ops.aten.convolution( input, weight, bias, stride, padding, dilation, transposed, output_padding, groups, ) ggI = torch.randn(input.shape, device=device) ggW = torch.randn(weight.shape, device=device) ggB = torch.randn(bias.shape, device=device) gO = torch.randn(output.shape, device=device) output_mask = [True, True, True] ( grad_grad_output, grad_input, grad_weight, ) = torch.ops.aten._convolution_double_backward( ggI, ggW, ggB, gO, weight, input, stride, padding, dilation, transposed, output_padding, groups, output_mask, ) # Make sure the correct shapes are computed. self.assertEqual(grad_grad_output.shape, gO.shape) self.assertEqual(grad_input.shape, input.shape) self.assertEqual(grad_weight.shape, weight.shape) @onlyCUDA @largeTensorTest("40GB") @largeTensorTest("24GB", "cpu") def test_conv3d_64bit_indexing(self, device): x = torch.rand(1, 32, 512, 512, 256) m = torch.nn.Conv3d(32, 1, kernel_size=1, padding=0, stride=1, bias=False) yref = m(x) y = m.to(device=device)(x.to(device=device)) self.assertEqual(yref, y) instantiate_device_type_tests(TestConvolutionNNDeviceType, globals()) instantiate_parametrized_tests(TestConvolutionNN) if __name__ == "__main__": run_tests()