xref: /aosp_15_r20/external/pytorch/torch/csrc/utils/tensor_numpy.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #include <torch/csrc/THP.h>
2 #include <torch/csrc/utils/tensor_numpy.h>
3 #define WITH_NUMPY_IMPORT_ARRAY
4 #include <c10/util/irange.h>
5 #include <torch/csrc/utils/numpy_stub.h>
6 
7 #ifndef USE_NUMPY
8 
9 namespace torch::utils {
tensor_to_numpy(const at::Tensor &,bool)10 PyObject* tensor_to_numpy(const at::Tensor&, bool) {
11   throw std::runtime_error("PyTorch was compiled without NumPy support");
12 }
tensor_from_numpy(PyObject * obj,bool warn_if_not_writeable)13 at::Tensor tensor_from_numpy(
14     PyObject* obj,
15     bool warn_if_not_writeable /*=true*/) {
16   throw std::runtime_error("PyTorch was compiled without NumPy support");
17 }
18 
is_numpy_available()19 bool is_numpy_available() {
20   throw std::runtime_error("PyTorch was compiled without NumPy support");
21 }
22 
is_numpy_int(PyObject * obj)23 bool is_numpy_int(PyObject* obj) {
24   throw std::runtime_error("PyTorch was compiled without NumPy support");
25 }
is_numpy_scalar(PyObject * obj)26 bool is_numpy_scalar(PyObject* obj) {
27   throw std::runtime_error("PyTorch was compiled without NumPy support");
28 }
tensor_from_cuda_array_interface(PyObject * obj)29 at::Tensor tensor_from_cuda_array_interface(PyObject* obj) {
30   throw std::runtime_error("PyTorch was compiled without NumPy support");
31 }
32 
warn_numpy_not_writeable()33 void warn_numpy_not_writeable() {
34   throw std::runtime_error("PyTorch was compiled without NumPy support");
35 }
36 
37 // No-op stubs.
validate_numpy_for_dlpack_deleter_bug()38 void validate_numpy_for_dlpack_deleter_bug() {}
39 
is_numpy_dlpack_deleter_bugged()40 bool is_numpy_dlpack_deleter_bugged() {
41   return false;
42 }
43 } // namespace torch::utils
44 
45 #else
46 
47 #include <torch/csrc/DynamicTypes.h>
48 #include <torch/csrc/Exceptions.h>
49 #include <torch/csrc/autograd/python_variable.h>
50 #include <torch/csrc/utils/object_ptr.h>
51 
52 #include <ATen/ATen.h>
53 #include <ATen/TensorUtils.h>
54 #include <memory>
55 #include <sstream>
56 #include <stdexcept>
57 
58 using namespace at;
59 using namespace torch::autograd;
60 
61 namespace torch {
62 namespace utils {
63 
is_numpy_available()64 bool is_numpy_available() {
65   static bool available = []() {
66     if (_import_array() >= 0) {
67       return true;
68     }
69     // Try to get exception message, print warning and return false
70     std::string message = "Failed to initialize NumPy";
71     // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
72     PyObject *type, *value, *traceback;
73     PyErr_Fetch(&type, &value, &traceback);
74     if (auto str = value ? PyObject_Str(value) : nullptr) {
75       if (auto enc_str = PyUnicode_AsEncodedString(str, "utf-8", "strict")) {
76         if (auto byte_str = PyBytes_AS_STRING(enc_str)) {
77           message += ": " + std::string(byte_str);
78         }
79         Py_XDECREF(enc_str);
80       }
81       Py_XDECREF(str);
82     }
83     PyErr_Clear();
84     TORCH_WARN(message);
85     return false;
86   }();
87   return available;
88 }
to_numpy_shape(IntArrayRef x)89 static std::vector<npy_intp> to_numpy_shape(IntArrayRef x) {
90   // shape and stride conversion from int64_t to npy_intp
91   auto nelem = x.size();
92   auto result = std::vector<npy_intp>(nelem);
93   for (const auto i : c10::irange(nelem)) {
94     result[i] = static_cast<npy_intp>(x[i]);
95   }
96   return result;
97 }
98 
to_aten_shape(int ndim,npy_intp * values)99 static std::vector<int64_t> to_aten_shape(int ndim, npy_intp* values) {
100   // shape and stride conversion from npy_intp to int64_t
101   auto result = std::vector<int64_t>(ndim);
102   for (const auto i : c10::irange(ndim)) {
103     result[i] = static_cast<int64_t>(values[i]);
104   }
105   return result;
106 }
107 
seq_to_aten_shape(PyObject * py_seq)108 static std::vector<int64_t> seq_to_aten_shape(PyObject* py_seq) {
109   int ndim = PySequence_Length(py_seq);
110   if (ndim == -1) {
111     throw TypeError("shape and strides must be sequences");
112   }
113   auto result = std::vector<int64_t>(ndim);
114   for (const auto i : c10::irange(ndim)) {
115     auto item = THPObjectPtr(PySequence_GetItem(py_seq, i));
116     if (!item)
117       throw python_error();
118 
119     result[i] = PyLong_AsLongLong(item);
120     if (result[i] == -1 && PyErr_Occurred())
121       throw python_error();
122   }
123   return result;
124 }
125 
tensor_to_numpy(const at::Tensor & tensor,bool force)126 PyObject* tensor_to_numpy(const at::Tensor& tensor, bool force /*=false*/) {
127   TORCH_CHECK(is_numpy_available(), "Numpy is not available");
128 
129   TORCH_CHECK(
130       !tensor.unsafeGetTensorImpl()->is_python_dispatch(),
131       ".numpy() is not supported for tensor subclasses.");
132 
133   TORCH_CHECK_TYPE(
134       tensor.layout() == Layout::Strided,
135       "can't convert ",
136       c10::str(tensor.layout()).c_str(),
137       " layout tensor to numpy. ",
138       "Use Tensor.dense() first.");
139 
140   if (!force) {
141     TORCH_CHECK_TYPE(
142         tensor.device().type() == DeviceType::CPU,
143         "can't convert ",
144         tensor.device().str().c_str(),
145         " device type tensor to numpy. Use Tensor.cpu() to ",
146         "copy the tensor to host memory first.");
147 
148     TORCH_CHECK(
149         !(at::GradMode::is_enabled() && tensor.requires_grad()),
150         "Can't call numpy() on Tensor that requires grad. "
151         "Use tensor.detach().numpy() instead.");
152 
153     TORCH_CHECK(
154         !tensor.is_conj(),
155         "Can't call numpy() on Tensor that has conjugate bit set. ",
156         "Use tensor.resolve_conj().numpy() instead.");
157 
158     TORCH_CHECK(
159         !tensor.is_neg(),
160         "Can't call numpy() on Tensor that has negative bit set. "
161         "Use tensor.resolve_neg().numpy() instead.");
162   }
163 
164   auto prepared_tensor = tensor.detach().cpu().resolve_conj().resolve_neg();
165 
166   auto dtype = aten_to_numpy_dtype(prepared_tensor.scalar_type());
167   auto sizes = to_numpy_shape(prepared_tensor.sizes());
168   auto strides = to_numpy_shape(prepared_tensor.strides());
169 
170   // NumPy strides use bytes. Torch strides use element counts.
171   auto element_size_in_bytes = prepared_tensor.element_size();
172   for (auto& stride : strides) {
173     stride *= element_size_in_bytes;
174   }
175 
176   auto array = THPObjectPtr(PyArray_New(
177       &PyArray_Type,
178       static_cast<int>(prepared_tensor.dim()),
179       sizes.data(),
180       dtype,
181       strides.data(),
182       prepared_tensor.data_ptr(),
183       0,
184       NPY_ARRAY_ALIGNED | NPY_ARRAY_WRITEABLE,
185       nullptr));
186   if (!array)
187     return nullptr;
188 
189   // TODO: This attempts to keep the underlying memory alive by setting the base
190   // object of the ndarray to the tensor and disabling resizes on the storage.
191   // This is not sufficient. For example, the tensor's storage may be changed
192   // via Tensor.set_, which can free the underlying memory.
193   PyObject* py_tensor = THPVariable_Wrap(prepared_tensor);
194   if (!py_tensor)
195     throw python_error();
196   if (PyArray_SetBaseObject((PyArrayObject*)array.get(), py_tensor) == -1) {
197     return nullptr;
198   }
199   // Use the private storage API
200   prepared_tensor.storage().unsafeGetStorageImpl()->set_resizable(false);
201 
202   return array.release();
203 }
204 
warn_numpy_not_writeable()205 void warn_numpy_not_writeable() {
206   TORCH_WARN_ONCE(
207       "The given NumPy array is not writable, and PyTorch does "
208       "not support non-writable tensors. This means writing to this tensor "
209       "will result in undefined behavior. "
210       "You may want to copy the array to protect its data or make it writable "
211       "before converting it to a tensor. This type of warning will be "
212       "suppressed for the rest of this program.");
213 }
214 
tensor_from_numpy(PyObject * obj,bool warn_if_not_writeable)215 at::Tensor tensor_from_numpy(
216     PyObject* obj,
217     bool warn_if_not_writeable /*=true*/) {
218   if (!is_numpy_available()) {
219     throw std::runtime_error("Numpy is not available");
220   }
221   TORCH_CHECK_TYPE(
222       PyArray_Check(obj),
223       "expected np.ndarray (got ",
224       Py_TYPE(obj)->tp_name,
225       ")");
226   auto array = (PyArrayObject*)obj;
227 
228   // warn_if_not_writable is true when a copy of numpy variable is created.
229   // the warning is suppressed when a copy is being created.
230   if (!PyArray_ISWRITEABLE(array) && warn_if_not_writeable) {
231     warn_numpy_not_writeable();
232   }
233 
234   int ndim = PyArray_NDIM(array);
235   auto sizes = to_aten_shape(ndim, PyArray_DIMS(array));
236   auto strides = to_aten_shape(ndim, PyArray_STRIDES(array));
237   // NumPy strides use bytes. Torch strides use element counts.
238   auto element_size_in_bytes = PyArray_ITEMSIZE(array);
239   for (auto& stride : strides) {
240     TORCH_CHECK_VALUE(
241         stride % element_size_in_bytes == 0,
242         "given numpy array strides not a multiple of the element byte size. "
243         "Copy the numpy array to reallocate the memory.");
244     stride /= element_size_in_bytes;
245   }
246 
247   for (const auto i : c10::irange(ndim)) {
248     TORCH_CHECK_VALUE(
249         strides[i] >= 0,
250         "At least one stride in the given numpy array is negative, "
251         "and tensors with negative strides are not currently supported. "
252         "(You can probably work around this by making a copy of your array "
253         " with array.copy().) ");
254   }
255 
256   void* data_ptr = PyArray_DATA(array);
257   TORCH_CHECK_VALUE(
258       PyArray_EquivByteorders(PyArray_DESCR(array)->byteorder, NPY_NATIVE),
259       "given numpy array has byte order different from the native byte order. "
260       "Conversion between byte orders is currently not supported.");
261   // This has to go before the INCREF in case the dtype mapping doesn't
262   // exist and an exception is thrown
263   auto torch_dtype = numpy_dtype_to_aten(PyArray_TYPE(array));
264   Py_INCREF(obj);
265   return at::lift_fresh(at::from_blob(
266       data_ptr,
267       sizes,
268       strides,
269       [obj](void* data) {
270         pybind11::gil_scoped_acquire gil;
271         Py_DECREF(obj);
272       },
273       at::device(kCPU).dtype(torch_dtype)));
274 }
275 
aten_to_numpy_dtype(const ScalarType scalar_type)276 int aten_to_numpy_dtype(const ScalarType scalar_type) {
277   switch (scalar_type) {
278     case kDouble:
279       return NPY_DOUBLE;
280     case kFloat:
281       return NPY_FLOAT;
282     case kHalf:
283       return NPY_HALF;
284     case kComplexDouble:
285       return NPY_COMPLEX128;
286     case kComplexFloat:
287       return NPY_COMPLEX64;
288     case kLong:
289       return NPY_INT64;
290     case kInt:
291       return NPY_INT32;
292     case kShort:
293       return NPY_INT16;
294     case kChar:
295       return NPY_INT8;
296     case kByte:
297       return NPY_UINT8;
298     case kUInt16:
299       return NPY_UINT16;
300     case kUInt32:
301       return NPY_UINT32;
302     case kUInt64:
303       return NPY_UINT64;
304     case kBool:
305       return NPY_BOOL;
306     default:
307       throw TypeError("Got unsupported ScalarType %s", toString(scalar_type));
308   }
309 }
310 
numpy_dtype_to_aten(int dtype)311 ScalarType numpy_dtype_to_aten(int dtype) {
312   switch (dtype) {
313     case NPY_DOUBLE:
314       return kDouble;
315     case NPY_FLOAT:
316       return kFloat;
317     case NPY_HALF:
318       return kHalf;
319     case NPY_COMPLEX64:
320       return kComplexFloat;
321     case NPY_COMPLEX128:
322       return kComplexDouble;
323     case NPY_INT16:
324       return kShort;
325     case NPY_INT8:
326       return kChar;
327     case NPY_UINT8:
328       return kByte;
329     case NPY_UINT16:
330       return kUInt16;
331     case NPY_UINT32:
332       return kUInt32;
333     case NPY_UINT64:
334       return kUInt64;
335     case NPY_BOOL:
336       return kBool;
337     default:
338       // Workaround: MSVC does not support two switch cases that have the same
339       // value
340       if (dtype == NPY_INT || dtype == NPY_INT32) {
341         // To cover all cases we must use NPY_INT because
342         // NPY_INT32 is an alias which maybe equal to:
343         // - NPY_INT, when sizeof(int) = 4 and sizeof(long) = 8
344         // - NPY_LONG, when sizeof(int) = 4 and sizeof(long) = 4
345         return kInt;
346       } else if (dtype == NPY_LONGLONG || dtype == NPY_INT64) {
347         // NPY_INT64 is an alias which maybe equal to:
348         // - NPY_LONG, when sizeof(long) = 8 and sizeof(long long) = 8
349         // - NPY_LONGLONG, when sizeof(long) = 4 and sizeof(long long) = 8
350         return kLong;
351       } else {
352         break; // break as if this is one of the cases above because this is
353                // only a workaround
354       }
355   }
356   auto pytype = THPObjectPtr(PyArray_TypeObjectFromType(dtype));
357   if (!pytype)
358     throw python_error();
359   throw TypeError(
360       "can't convert np.ndarray of type %s. The only supported types are: "
361       "float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.",
362       ((PyTypeObject*)pytype.get())->tp_name);
363 }
364 
is_numpy_int(PyObject * obj)365 bool is_numpy_int(PyObject* obj) {
366   return is_numpy_available() && PyArray_IsScalar((obj), Integer);
367 }
368 
is_numpy_bool(PyObject * obj)369 bool is_numpy_bool(PyObject* obj) {
370   return is_numpy_available() && PyArray_IsScalar((obj), Bool);
371 }
372 
is_numpy_scalar(PyObject * obj)373 bool is_numpy_scalar(PyObject* obj) {
374   return is_numpy_available() &&
375       (is_numpy_int(obj) || PyArray_IsScalar(obj, Bool) ||
376        PyArray_IsScalar(obj, Floating) ||
377        PyArray_IsScalar(obj, ComplexFloating));
378 }
379 
tensor_from_cuda_array_interface(PyObject * obj)380 at::Tensor tensor_from_cuda_array_interface(PyObject* obj) {
381   if (!is_numpy_available()) {
382     throw std::runtime_error("Numpy is not available");
383   }
384   auto cuda_dict =
385       THPObjectPtr(PyObject_GetAttrString(obj, "__cuda_array_interface__"));
386   TORCH_INTERNAL_ASSERT(cuda_dict);
387 
388   if (!PyDict_Check(cuda_dict.get())) {
389     throw TypeError("`__cuda_array_interface__` must be a dict");
390   }
391 
392   // Extract the `obj.__cuda_array_interface__['shape']` attribute
393   std::vector<int64_t> sizes;
394   {
395     PyObject* py_shape = PyDict_GetItemString(cuda_dict, "shape");
396     if (py_shape == nullptr) {
397       throw TypeError("attribute `shape` must exist");
398     }
399     sizes = seq_to_aten_shape(py_shape);
400   }
401 
402   // Extract the `obj.__cuda_array_interface__['typestr']` attribute
403   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
404   ScalarType dtype;
405   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
406   int dtype_size_in_bytes;
407   {
408     PyObject* py_typestr = PyDict_GetItemString(cuda_dict, "typestr");
409     if (py_typestr == nullptr) {
410       throw TypeError("attribute `typestr` must exist");
411     }
412     // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
413     PyArray_Descr* descr;
414     TORCH_CHECK_VALUE(
415         PyArray_DescrConverter(py_typestr, &descr), "cannot parse `typestr`");
416     dtype = numpy_dtype_to_aten(descr->type_num);
417 #if NPY_ABI_VERSION >= 0x02000000
418     dtype_size_in_bytes = PyDataType_ELSIZE(descr);
419 #else
420     dtype_size_in_bytes = descr->elsize;
421 #endif
422     TORCH_INTERNAL_ASSERT(dtype_size_in_bytes > 0);
423   }
424 
425   // Extract the `obj.__cuda_array_interface__['data']` attribute
426   // NOLINTNEXTLINE(cppcoreguidelines-init-variables)
427   void* data_ptr;
428   {
429     PyObject* py_data = PyDict_GetItemString(cuda_dict, "data");
430     if (py_data == nullptr) {
431       throw TypeError("attribute `shape` data exist");
432     }
433     if (!PyTuple_Check(py_data) || PyTuple_GET_SIZE(py_data) != 2) {
434       throw TypeError("`data` must be a 2-tuple of (int, bool)");
435     }
436     data_ptr = PyLong_AsVoidPtr(PyTuple_GET_ITEM(py_data, 0));
437     if (data_ptr == nullptr && PyErr_Occurred()) {
438       throw python_error();
439     }
440     int read_only = PyObject_IsTrue(PyTuple_GET_ITEM(py_data, 1));
441     if (read_only == -1) {
442       throw python_error();
443     }
444     if (read_only) {
445       throw TypeError(
446           "the read only flag is not supported, should always be False");
447     }
448   }
449 
450   // Extract the `obj.__cuda_array_interface__['strides']` attribute
451   std::vector<int64_t> strides;
452   {
453     PyObject* py_strides = PyDict_GetItemString(cuda_dict, "strides");
454     if (py_strides != nullptr && py_strides != Py_None) {
455       if (PySequence_Length(py_strides) == -1 ||
456           static_cast<size_t>(PySequence_Length(py_strides)) != sizes.size()) {
457         throw TypeError(
458             "strides must be a sequence of the same length as shape");
459       }
460       strides = seq_to_aten_shape(py_strides);
461 
462       // __cuda_array_interface__ strides use bytes. Torch strides use element
463       // counts.
464       for (auto& stride : strides) {
465         TORCH_CHECK_VALUE(
466             stride % dtype_size_in_bytes == 0,
467             "given array strides not a multiple of the element byte size. "
468             "Make a copy of the array to reallocate the memory.");
469         stride /= dtype_size_in_bytes;
470       }
471     } else {
472       strides = at::detail::defaultStrides(sizes);
473     }
474   }
475 
476   const auto target_device = [&]() -> std::optional<Device> {
477     // note(crcrpar): zero-size arrays come with nullptr.
478     // ref:
479     // https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html#cuda-array-interface-version-3
480     if (data_ptr != nullptr) {
481       return {};
482     } else {
483       const auto current_device = at::detail::getCUDAHooks().current_device();
484       return Device(
485           kCUDA,
486           static_cast<DeviceIndex>(current_device > -1 ? current_device : 0));
487     }
488   }();
489 
490   Py_INCREF(obj);
491   return at::from_blob(
492       data_ptr,
493       sizes,
494       strides,
495       [obj](void* data) {
496         pybind11::gil_scoped_acquire gil;
497         Py_DECREF(obj);
498       },
499       at::device(kCUDA).dtype(dtype),
500       target_device);
501 }
502 
503 // Mutated only once (during module init); behaves as an immutable variable
504 // thereafter.
505 bool numpy_with_dlpack_deleter_bug_installed = false;
506 
507 // NumPy implemented support for Dlpack capsules in version 1.22.0. However, the
508 // initial implementation did not correctly handle the invocation of
509 // `DLManagedTensor::deleter` in a no-GIL context. Until PyTorch 1.13.0, we
510 // were implicitly holding the GIL when the deleter was invoked, but this
511 // incurred a significant performance overhead when mem-unmapping large tensors.
512 // Starting with PyTorch 1.13.0, we release the GIL in `THPVariable_clear` just
513 // before deallocation, but this triggers the aforementioned bug in NumPy.
514 //
515 // The NumPy bug should be fixed in version 1.24.0, but all releases
516 // between 1.22.0 and 1.23.5 result in internal assertion failures that
517 // consequently lead to segfaults. To work around this, we need to selectively
518 // disable the optimization whenever we detect a buggy NumPy installation.
519 // We would ideally restrict the "fix" just to Dlpack-backed tensors that stem
520 // from NumPy, but given that it is difficult to confidently detect the
521 // provenance of such tensors, we have to resort to a more general approach.
522 //
523 // References:
524 //  https://github.com/pytorch/pytorch/issues/88082
525 //  https://github.com/pytorch/pytorch/issues/77139
526 //  https://github.com/numpy/numpy/issues/22507
validate_numpy_for_dlpack_deleter_bug()527 void validate_numpy_for_dlpack_deleter_bug() {
528   // Ensure that we don't call this more than once per session.
529   static bool validated = false;
530   TORCH_INTERNAL_ASSERT(validated == false);
531   validated = true;
532 
533   THPObjectPtr numpy_module(PyImport_ImportModule("numpy"));
534   if (!numpy_module) {
535     PyErr_Clear();
536     return;
537   }
538 
539   THPObjectPtr version_attr(
540       PyObject_GetAttrString(numpy_module.get(), "__version__"));
541   if (!version_attr) {
542     PyErr_Clear();
543     return;
544   }
545 
546   Py_ssize_t version_utf8_size = 0;
547   const char* version_utf8 =
548       PyUnicode_AsUTF8AndSize(version_attr.get(), &version_utf8_size);
549   if (!version_utf8_size) {
550     PyErr_Clear();
551     return;
552   }
553   std::string version(version_utf8, version_utf8_size);
554   if (version_utf8_size < 4)
555     return;
556   std::string truncated_version(version.substr(0, 4));
557   numpy_with_dlpack_deleter_bug_installed =
558       truncated_version == "1.22" || truncated_version == "1.23";
559 }
560 
is_numpy_dlpack_deleter_bugged()561 bool is_numpy_dlpack_deleter_bugged() {
562   return numpy_with_dlpack_deleter_bug_installed;
563 }
564 } // namespace utils
565 } // namespace torch
566 
567 #endif // USE_NUMPY
568