1 #include <torch/csrc/THP.h>
2 #include <torch/csrc/utils/tensor_numpy.h>
3 #define WITH_NUMPY_IMPORT_ARRAY
4 #include <c10/util/irange.h>
5 #include <torch/csrc/utils/numpy_stub.h>
6
7 #ifndef USE_NUMPY
8
9 namespace torch::utils {
tensor_to_numpy(const at::Tensor &,bool)10 PyObject* tensor_to_numpy(const at::Tensor&, bool) {
11 throw std::runtime_error("PyTorch was compiled without NumPy support");
12 }
tensor_from_numpy(PyObject * obj,bool warn_if_not_writeable)13 at::Tensor tensor_from_numpy(
14 PyObject* obj,
15 bool warn_if_not_writeable /*=true*/) {
16 throw std::runtime_error("PyTorch was compiled without NumPy support");
17 }
18
is_numpy_available()19 bool is_numpy_available() {
20 throw std::runtime_error("PyTorch was compiled without NumPy support");
21 }
22
is_numpy_int(PyObject * obj)23 bool is_numpy_int(PyObject* obj) {
24 throw std::runtime_error("PyTorch was compiled without NumPy support");
25 }
is_numpy_scalar(PyObject * obj)26 bool is_numpy_scalar(PyObject* obj) {
27 throw std::runtime_error("PyTorch was compiled without NumPy support");
28 }
tensor_from_cuda_array_interface(PyObject * obj)29 at::Tensor tensor_from_cuda_array_interface(PyObject* obj) {
30 throw std::runtime_error("PyTorch was compiled without NumPy support");
31 }
32
warn_numpy_not_writeable()33 void warn_numpy_not_writeable() {
34 throw std::runtime_error("PyTorch was compiled without NumPy support");
35 }
36
37 // No-op stubs.
validate_numpy_for_dlpack_deleter_bug()38 void validate_numpy_for_dlpack_deleter_bug() {}
39
is_numpy_dlpack_deleter_bugged()40 bool is_numpy_dlpack_deleter_bugged() {
41 return false;
42 }
43 } // namespace torch::utils
44
45 #else
46
47 #include <torch/csrc/DynamicTypes.h>
48 #include <torch/csrc/Exceptions.h>
49 #include <torch/csrc/autograd/python_variable.h>
50 #include <torch/csrc/utils/object_ptr.h>
51
52 #include <ATen/ATen.h>
53 #include <ATen/TensorUtils.h>
54 #include <memory>
55 #include <sstream>
56 #include <stdexcept>
57
58 using namespace at;
59 using namespace torch::autograd;
60
61 namespace torch {
62 namespace utils {
63
is_numpy_available()64 bool is_numpy_available() {
65 static bool available = []() {
66 if (_import_array() >= 0) {
67 return true;
68 }
69 // Try to get exception message, print warning and return false
70 std::string message = "Failed to initialize NumPy";
71 // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
72 PyObject *type, *value, *traceback;
73 PyErr_Fetch(&type, &value, &traceback);
74 if (auto str = value ? PyObject_Str(value) : nullptr) {
75 if (auto enc_str = PyUnicode_AsEncodedString(str, "utf-8", "strict")) {
76 if (auto byte_str = PyBytes_AS_STRING(enc_str)) {
77 message += ": " + std::string(byte_str);
78 }
79 Py_XDECREF(enc_str);
80 }
81 Py_XDECREF(str);
82 }
83 PyErr_Clear();
84 TORCH_WARN(message);
85 return false;
86 }();
87 return available;
88 }
to_numpy_shape(IntArrayRef x)89 static std::vector<npy_intp> to_numpy_shape(IntArrayRef x) {
90 // shape and stride conversion from int64_t to npy_intp
91 auto nelem = x.size();
92 auto result = std::vector<npy_intp>(nelem);
93 for (const auto i : c10::irange(nelem)) {
94 result[i] = static_cast<npy_intp>(x[i]);
95 }
96 return result;
97 }
98
to_aten_shape(int ndim,npy_intp * values)99 static std::vector<int64_t> to_aten_shape(int ndim, npy_intp* values) {
100 // shape and stride conversion from npy_intp to int64_t
101 auto result = std::vector<int64_t>(ndim);
102 for (const auto i : c10::irange(ndim)) {
103 result[i] = static_cast<int64_t>(values[i]);
104 }
105 return result;
106 }
107
seq_to_aten_shape(PyObject * py_seq)108 static std::vector<int64_t> seq_to_aten_shape(PyObject* py_seq) {
109 int ndim = PySequence_Length(py_seq);
110 if (ndim == -1) {
111 throw TypeError("shape and strides must be sequences");
112 }
113 auto result = std::vector<int64_t>(ndim);
114 for (const auto i : c10::irange(ndim)) {
115 auto item = THPObjectPtr(PySequence_GetItem(py_seq, i));
116 if (!item)
117 throw python_error();
118
119 result[i] = PyLong_AsLongLong(item);
120 if (result[i] == -1 && PyErr_Occurred())
121 throw python_error();
122 }
123 return result;
124 }
125
tensor_to_numpy(const at::Tensor & tensor,bool force)126 PyObject* tensor_to_numpy(const at::Tensor& tensor, bool force /*=false*/) {
127 TORCH_CHECK(is_numpy_available(), "Numpy is not available");
128
129 TORCH_CHECK(
130 !tensor.unsafeGetTensorImpl()->is_python_dispatch(),
131 ".numpy() is not supported for tensor subclasses.");
132
133 TORCH_CHECK_TYPE(
134 tensor.layout() == Layout::Strided,
135 "can't convert ",
136 c10::str(tensor.layout()).c_str(),
137 " layout tensor to numpy. ",
138 "Use Tensor.dense() first.");
139
140 if (!force) {
141 TORCH_CHECK_TYPE(
142 tensor.device().type() == DeviceType::CPU,
143 "can't convert ",
144 tensor.device().str().c_str(),
145 " device type tensor to numpy. Use Tensor.cpu() to ",
146 "copy the tensor to host memory first.");
147
148 TORCH_CHECK(
149 !(at::GradMode::is_enabled() && tensor.requires_grad()),
150 "Can't call numpy() on Tensor that requires grad. "
151 "Use tensor.detach().numpy() instead.");
152
153 TORCH_CHECK(
154 !tensor.is_conj(),
155 "Can't call numpy() on Tensor that has conjugate bit set. ",
156 "Use tensor.resolve_conj().numpy() instead.");
157
158 TORCH_CHECK(
159 !tensor.is_neg(),
160 "Can't call numpy() on Tensor that has negative bit set. "
161 "Use tensor.resolve_neg().numpy() instead.");
162 }
163
164 auto prepared_tensor = tensor.detach().cpu().resolve_conj().resolve_neg();
165
166 auto dtype = aten_to_numpy_dtype(prepared_tensor.scalar_type());
167 auto sizes = to_numpy_shape(prepared_tensor.sizes());
168 auto strides = to_numpy_shape(prepared_tensor.strides());
169
170 // NumPy strides use bytes. Torch strides use element counts.
171 auto element_size_in_bytes = prepared_tensor.element_size();
172 for (auto& stride : strides) {
173 stride *= element_size_in_bytes;
174 }
175
176 auto array = THPObjectPtr(PyArray_New(
177 &PyArray_Type,
178 static_cast<int>(prepared_tensor.dim()),
179 sizes.data(),
180 dtype,
181 strides.data(),
182 prepared_tensor.data_ptr(),
183 0,
184 NPY_ARRAY_ALIGNED | NPY_ARRAY_WRITEABLE,
185 nullptr));
186 if (!array)
187 return nullptr;
188
189 // TODO: This attempts to keep the underlying memory alive by setting the base
190 // object of the ndarray to the tensor and disabling resizes on the storage.
191 // This is not sufficient. For example, the tensor's storage may be changed
192 // via Tensor.set_, which can free the underlying memory.
193 PyObject* py_tensor = THPVariable_Wrap(prepared_tensor);
194 if (!py_tensor)
195 throw python_error();
196 if (PyArray_SetBaseObject((PyArrayObject*)array.get(), py_tensor) == -1) {
197 return nullptr;
198 }
199 // Use the private storage API
200 prepared_tensor.storage().unsafeGetStorageImpl()->set_resizable(false);
201
202 return array.release();
203 }
204
warn_numpy_not_writeable()205 void warn_numpy_not_writeable() {
206 TORCH_WARN_ONCE(
207 "The given NumPy array is not writable, and PyTorch does "
208 "not support non-writable tensors. This means writing to this tensor "
209 "will result in undefined behavior. "
210 "You may want to copy the array to protect its data or make it writable "
211 "before converting it to a tensor. This type of warning will be "
212 "suppressed for the rest of this program.");
213 }
214
tensor_from_numpy(PyObject * obj,bool warn_if_not_writeable)215 at::Tensor tensor_from_numpy(
216 PyObject* obj,
217 bool warn_if_not_writeable /*=true*/) {
218 if (!is_numpy_available()) {
219 throw std::runtime_error("Numpy is not available");
220 }
221 TORCH_CHECK_TYPE(
222 PyArray_Check(obj),
223 "expected np.ndarray (got ",
224 Py_TYPE(obj)->tp_name,
225 ")");
226 auto array = (PyArrayObject*)obj;
227
228 // warn_if_not_writable is true when a copy of numpy variable is created.
229 // the warning is suppressed when a copy is being created.
230 if (!PyArray_ISWRITEABLE(array) && warn_if_not_writeable) {
231 warn_numpy_not_writeable();
232 }
233
234 int ndim = PyArray_NDIM(array);
235 auto sizes = to_aten_shape(ndim, PyArray_DIMS(array));
236 auto strides = to_aten_shape(ndim, PyArray_STRIDES(array));
237 // NumPy strides use bytes. Torch strides use element counts.
238 auto element_size_in_bytes = PyArray_ITEMSIZE(array);
239 for (auto& stride : strides) {
240 TORCH_CHECK_VALUE(
241 stride % element_size_in_bytes == 0,
242 "given numpy array strides not a multiple of the element byte size. "
243 "Copy the numpy array to reallocate the memory.");
244 stride /= element_size_in_bytes;
245 }
246
247 for (const auto i : c10::irange(ndim)) {
248 TORCH_CHECK_VALUE(
249 strides[i] >= 0,
250 "At least one stride in the given numpy array is negative, "
251 "and tensors with negative strides are not currently supported. "
252 "(You can probably work around this by making a copy of your array "
253 " with array.copy().) ");
254 }
255
256 void* data_ptr = PyArray_DATA(array);
257 TORCH_CHECK_VALUE(
258 PyArray_EquivByteorders(PyArray_DESCR(array)->byteorder, NPY_NATIVE),
259 "given numpy array has byte order different from the native byte order. "
260 "Conversion between byte orders is currently not supported.");
261 // This has to go before the INCREF in case the dtype mapping doesn't
262 // exist and an exception is thrown
263 auto torch_dtype = numpy_dtype_to_aten(PyArray_TYPE(array));
264 Py_INCREF(obj);
265 return at::lift_fresh(at::from_blob(
266 data_ptr,
267 sizes,
268 strides,
269 [obj](void* data) {
270 pybind11::gil_scoped_acquire gil;
271 Py_DECREF(obj);
272 },
273 at::device(kCPU).dtype(torch_dtype)));
274 }
275
aten_to_numpy_dtype(const ScalarType scalar_type)276 int aten_to_numpy_dtype(const ScalarType scalar_type) {
277 switch (scalar_type) {
278 case kDouble:
279 return NPY_DOUBLE;
280 case kFloat:
281 return NPY_FLOAT;
282 case kHalf:
283 return NPY_HALF;
284 case kComplexDouble:
285 return NPY_COMPLEX128;
286 case kComplexFloat:
287 return NPY_COMPLEX64;
288 case kLong:
289 return NPY_INT64;
290 case kInt:
291 return NPY_INT32;
292 case kShort:
293 return NPY_INT16;
294 case kChar:
295 return NPY_INT8;
296 case kByte:
297 return NPY_UINT8;
298 case kUInt16:
299 return NPY_UINT16;
300 case kUInt32:
301 return NPY_UINT32;
302 case kUInt64:
303 return NPY_UINT64;
304 case kBool:
305 return NPY_BOOL;
306 default:
307 throw TypeError("Got unsupported ScalarType %s", toString(scalar_type));
308 }
309 }
310
numpy_dtype_to_aten(int dtype)311 ScalarType numpy_dtype_to_aten(int dtype) {
312 switch (dtype) {
313 case NPY_DOUBLE:
314 return kDouble;
315 case NPY_FLOAT:
316 return kFloat;
317 case NPY_HALF:
318 return kHalf;
319 case NPY_COMPLEX64:
320 return kComplexFloat;
321 case NPY_COMPLEX128:
322 return kComplexDouble;
323 case NPY_INT16:
324 return kShort;
325 case NPY_INT8:
326 return kChar;
327 case NPY_UINT8:
328 return kByte;
329 case NPY_UINT16:
330 return kUInt16;
331 case NPY_UINT32:
332 return kUInt32;
333 case NPY_UINT64:
334 return kUInt64;
335 case NPY_BOOL:
336 return kBool;
337 default:
338 // Workaround: MSVC does not support two switch cases that have the same
339 // value
340 if (dtype == NPY_INT || dtype == NPY_INT32) {
341 // To cover all cases we must use NPY_INT because
342 // NPY_INT32 is an alias which maybe equal to:
343 // - NPY_INT, when sizeof(int) = 4 and sizeof(long) = 8
344 // - NPY_LONG, when sizeof(int) = 4 and sizeof(long) = 4
345 return kInt;
346 } else if (dtype == NPY_LONGLONG || dtype == NPY_INT64) {
347 // NPY_INT64 is an alias which maybe equal to:
348 // - NPY_LONG, when sizeof(long) = 8 and sizeof(long long) = 8
349 // - NPY_LONGLONG, when sizeof(long) = 4 and sizeof(long long) = 8
350 return kLong;
351 } else {
352 break; // break as if this is one of the cases above because this is
353 // only a workaround
354 }
355 }
356 auto pytype = THPObjectPtr(PyArray_TypeObjectFromType(dtype));
357 if (!pytype)
358 throw python_error();
359 throw TypeError(
360 "can't convert np.ndarray of type %s. The only supported types are: "
361 "float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.",
362 ((PyTypeObject*)pytype.get())->tp_name);
363 }
364
is_numpy_int(PyObject * obj)365 bool is_numpy_int(PyObject* obj) {
366 return is_numpy_available() && PyArray_IsScalar((obj), Integer);
367 }
368
is_numpy_bool(PyObject * obj)369 bool is_numpy_bool(PyObject* obj) {
370 return is_numpy_available() && PyArray_IsScalar((obj), Bool);
371 }
372
is_numpy_scalar(PyObject * obj)373 bool is_numpy_scalar(PyObject* obj) {
374 return is_numpy_available() &&
375 (is_numpy_int(obj) || PyArray_IsScalar(obj, Bool) ||
376 PyArray_IsScalar(obj, Floating) ||
377 PyArray_IsScalar(obj, ComplexFloating));
378 }
379
tensor_from_cuda_array_interface(PyObject * obj)380 at::Tensor tensor_from_cuda_array_interface(PyObject* obj) {
381 if (!is_numpy_available()) {
382 throw std::runtime_error("Numpy is not available");
383 }
384 auto cuda_dict =
385 THPObjectPtr(PyObject_GetAttrString(obj, "__cuda_array_interface__"));
386 TORCH_INTERNAL_ASSERT(cuda_dict);
387
388 if (!PyDict_Check(cuda_dict.get())) {
389 throw TypeError("`__cuda_array_interface__` must be a dict");
390 }
391
392 // Extract the `obj.__cuda_array_interface__['shape']` attribute
393 std::vector<int64_t> sizes;
394 {
395 PyObject* py_shape = PyDict_GetItemString(cuda_dict, "shape");
396 if (py_shape == nullptr) {
397 throw TypeError("attribute `shape` must exist");
398 }
399 sizes = seq_to_aten_shape(py_shape);
400 }
401
402 // Extract the `obj.__cuda_array_interface__['typestr']` attribute
403 // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
404 ScalarType dtype;
405 // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
406 int dtype_size_in_bytes;
407 {
408 PyObject* py_typestr = PyDict_GetItemString(cuda_dict, "typestr");
409 if (py_typestr == nullptr) {
410 throw TypeError("attribute `typestr` must exist");
411 }
412 // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
413 PyArray_Descr* descr;
414 TORCH_CHECK_VALUE(
415 PyArray_DescrConverter(py_typestr, &descr), "cannot parse `typestr`");
416 dtype = numpy_dtype_to_aten(descr->type_num);
417 #if NPY_ABI_VERSION >= 0x02000000
418 dtype_size_in_bytes = PyDataType_ELSIZE(descr);
419 #else
420 dtype_size_in_bytes = descr->elsize;
421 #endif
422 TORCH_INTERNAL_ASSERT(dtype_size_in_bytes > 0);
423 }
424
425 // Extract the `obj.__cuda_array_interface__['data']` attribute
426 // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
427 void* data_ptr;
428 {
429 PyObject* py_data = PyDict_GetItemString(cuda_dict, "data");
430 if (py_data == nullptr) {
431 throw TypeError("attribute `shape` data exist");
432 }
433 if (!PyTuple_Check(py_data) || PyTuple_GET_SIZE(py_data) != 2) {
434 throw TypeError("`data` must be a 2-tuple of (int, bool)");
435 }
436 data_ptr = PyLong_AsVoidPtr(PyTuple_GET_ITEM(py_data, 0));
437 if (data_ptr == nullptr && PyErr_Occurred()) {
438 throw python_error();
439 }
440 int read_only = PyObject_IsTrue(PyTuple_GET_ITEM(py_data, 1));
441 if (read_only == -1) {
442 throw python_error();
443 }
444 if (read_only) {
445 throw TypeError(
446 "the read only flag is not supported, should always be False");
447 }
448 }
449
450 // Extract the `obj.__cuda_array_interface__['strides']` attribute
451 std::vector<int64_t> strides;
452 {
453 PyObject* py_strides = PyDict_GetItemString(cuda_dict, "strides");
454 if (py_strides != nullptr && py_strides != Py_None) {
455 if (PySequence_Length(py_strides) == -1 ||
456 static_cast<size_t>(PySequence_Length(py_strides)) != sizes.size()) {
457 throw TypeError(
458 "strides must be a sequence of the same length as shape");
459 }
460 strides = seq_to_aten_shape(py_strides);
461
462 // __cuda_array_interface__ strides use bytes. Torch strides use element
463 // counts.
464 for (auto& stride : strides) {
465 TORCH_CHECK_VALUE(
466 stride % dtype_size_in_bytes == 0,
467 "given array strides not a multiple of the element byte size. "
468 "Make a copy of the array to reallocate the memory.");
469 stride /= dtype_size_in_bytes;
470 }
471 } else {
472 strides = at::detail::defaultStrides(sizes);
473 }
474 }
475
476 const auto target_device = [&]() -> std::optional<Device> {
477 // note(crcrpar): zero-size arrays come with nullptr.
478 // ref:
479 // https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html#cuda-array-interface-version-3
480 if (data_ptr != nullptr) {
481 return {};
482 } else {
483 const auto current_device = at::detail::getCUDAHooks().current_device();
484 return Device(
485 kCUDA,
486 static_cast<DeviceIndex>(current_device > -1 ? current_device : 0));
487 }
488 }();
489
490 Py_INCREF(obj);
491 return at::from_blob(
492 data_ptr,
493 sizes,
494 strides,
495 [obj](void* data) {
496 pybind11::gil_scoped_acquire gil;
497 Py_DECREF(obj);
498 },
499 at::device(kCUDA).dtype(dtype),
500 target_device);
501 }
502
503 // Mutated only once (during module init); behaves as an immutable variable
504 // thereafter.
505 bool numpy_with_dlpack_deleter_bug_installed = false;
506
507 // NumPy implemented support for Dlpack capsules in version 1.22.0. However, the
508 // initial implementation did not correctly handle the invocation of
509 // `DLManagedTensor::deleter` in a no-GIL context. Until PyTorch 1.13.0, we
510 // were implicitly holding the GIL when the deleter was invoked, but this
511 // incurred a significant performance overhead when mem-unmapping large tensors.
512 // Starting with PyTorch 1.13.0, we release the GIL in `THPVariable_clear` just
513 // before deallocation, but this triggers the aforementioned bug in NumPy.
514 //
515 // The NumPy bug should be fixed in version 1.24.0, but all releases
516 // between 1.22.0 and 1.23.5 result in internal assertion failures that
517 // consequently lead to segfaults. To work around this, we need to selectively
518 // disable the optimization whenever we detect a buggy NumPy installation.
519 // We would ideally restrict the "fix" just to Dlpack-backed tensors that stem
520 // from NumPy, but given that it is difficult to confidently detect the
521 // provenance of such tensors, we have to resort to a more general approach.
522 //
523 // References:
524 // https://github.com/pytorch/pytorch/issues/88082
525 // https://github.com/pytorch/pytorch/issues/77139
526 // https://github.com/numpy/numpy/issues/22507
validate_numpy_for_dlpack_deleter_bug()527 void validate_numpy_for_dlpack_deleter_bug() {
528 // Ensure that we don't call this more than once per session.
529 static bool validated = false;
530 TORCH_INTERNAL_ASSERT(validated == false);
531 validated = true;
532
533 THPObjectPtr numpy_module(PyImport_ImportModule("numpy"));
534 if (!numpy_module) {
535 PyErr_Clear();
536 return;
537 }
538
539 THPObjectPtr version_attr(
540 PyObject_GetAttrString(numpy_module.get(), "__version__"));
541 if (!version_attr) {
542 PyErr_Clear();
543 return;
544 }
545
546 Py_ssize_t version_utf8_size = 0;
547 const char* version_utf8 =
548 PyUnicode_AsUTF8AndSize(version_attr.get(), &version_utf8_size);
549 if (!version_utf8_size) {
550 PyErr_Clear();
551 return;
552 }
553 std::string version(version_utf8, version_utf8_size);
554 if (version_utf8_size < 4)
555 return;
556 std::string truncated_version(version.substr(0, 4));
557 numpy_with_dlpack_deleter_bug_installed =
558 truncated_version == "1.22" || truncated_version == "1.23";
559 }
560
is_numpy_dlpack_deleter_bugged()561 bool is_numpy_dlpack_deleter_bugged() {
562 return numpy_with_dlpack_deleter_bug_installed;
563 }
564 } // namespace utils
565 } // namespace torch
566
567 #endif // USE_NUMPY
568