xref: /aosp_15_r20/external/pytorch/aten/src/ATen/EmptyTensor.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #define TORCH_ASSERT_NO_OPERATORS
2 #include <ATen/EmptyTensor.h>
3 #include <ATen/detail/CUDAHooksInterface.h>
4 #include <ATen/detail/XPUHooksInterface.h>
5 #include <ATen/Context.h>
6 #include <ATen/detail/PrivateUse1HooksInterface.h>
7 #include <c10/core/CPUAllocator.h>
8 #include <c10/util/safe_numerics.h>
9 
10 #include <limits>
11 
12 namespace at::detail {
13 namespace {
GetCPUAllocatorMaybePinned(bool pin_memory)14 c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
15   if (pin_memory) {
16     // NB: This is not quite right, if you somehow had both CUDA and PrivateUse1 initialized
17     // in the same PyTorch build, you would ONLY ever get the CUDA pinned memory allocator.
18     // To properly support this, see https://github.com/pytorch/pytorch/issues/14560
19     if (at::globalContext().hasCUDA()) {
20       return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
21     } else if (at::globalContext().hasXPU()) {
22       return at::detail::getXPUHooks().getPinnedMemoryAllocator();
23     } else if(at::isPrivateUse1HooksRegistered()) {
24       return at::detail::getPrivateUse1Hooks().getPinnedMemoryAllocator();
25     } else {
26       TORCH_CHECK(false, "Need to provide pin_memory allocator to use pin memory.")
27     }
28   }
29   return c10::GetCPUAllocator();
30 }
31 
32 #ifndef C10_MOBILE
storage_max()33 constexpr uint64_t storage_max() {
34   // int64_t and size_t are used somewhat inconsistently throughout ATen.
35   // To be safe, storage size calculations must fit in both types.
36   constexpr auto int64_max = static_cast<uint64_t>(
37       std::numeric_limits<int64_t>::max());
38   constexpr auto size_max = static_cast<uint64_t>(
39       std::numeric_limits<size_t>::max());
40   return std::min(int64_max, size_max);
41 }
42 #endif
43 
raise_warning_for_complex_half(ScalarType dtype)44 inline void raise_warning_for_complex_half(ScalarType dtype) {
45   if (dtype == kComplexHalf) {
46     TORCH_WARN_ONCE(
47         "ComplexHalf support is experimental and many operators don't support it yet.");
48   }
49 }
50 
51 }  // namespace (anonymous)
52 
computeStorageNbytesContiguous(IntArrayRef sizes,size_t itemsize_bytes,size_t storage_offset)53 size_t computeStorageNbytesContiguous(
54     IntArrayRef sizes,
55     size_t itemsize_bytes,
56     size_t storage_offset
57   ) {
58   // Ignore overflow checks on mobile
59 #ifndef C10_MOBILE
60   uint64_t size = 1;
61   bool overflowed = c10::safe_multiplies_u64(sizes, &size);
62   overflowed |= c10::add_overflows(size, storage_offset, &size);
63   overflowed |= c10::mul_overflows(size, itemsize_bytes, &size);
64   overflowed |= size > storage_max();
65   TORCH_CHECK(!overflowed,
66               "Storage size calculation overflowed with sizes=", sizes);
67   return static_cast<size_t>(size);
68 #else
69   const auto numel = c10::multiply_integers(sizes);
70   return itemsize_bytes * (storage_offset + numel);
71 #endif
72 }
73 
computeStorageNbytes(IntArrayRef sizes,IntArrayRef strides,size_t itemsize_bytes,size_t storage_offset)74 size_t computeStorageNbytes(
75     IntArrayRef sizes,
76     IntArrayRef strides,
77     size_t itemsize_bytes,
78     size_t storage_offset
79   ) {
80   TORCH_CHECK(
81     sizes.size() == strides.size(),
82     "dimensionality of sizes (",
83     sizes.size(),
84     ") must match dimensionality of strides (",
85     strides.size(),
86     ")");
87 
88   // Ignore overflow checks on mobile
89 #ifndef C10_MOBILE
90   // size of the underlying storage is 1 bigger than the offset
91   // of the last element according to stride
92   uint64_t size = storage_offset + 1;
93   bool overflowed = false;
94   for (const auto i : c10::irange(sizes.size())) {
95     if (sizes[i] == 0) {
96       return 0;
97     }
98 
99     uint64_t strided_size = 0;
100     overflowed |= c10::mul_overflows(strides[i], sizes[i] - 1, &strided_size);
101     overflowed |= c10::add_overflows(size, strided_size, &size);
102   }
103   overflowed |= c10::mul_overflows(size, itemsize_bytes, &size);
104   overflowed |= size > storage_max();
105   TORCH_CHECK(!overflowed,
106               "Storage size calculation overflowed with sizes=",
107               sizes, " and strides=", strides);
108   return static_cast<size_t>(size);
109 #else
110   // size of the underlying storage is 1 bigger than the offset
111   // of the last element according to stride
112   uint64_t size = 1;
113   for (const auto i : c10::irange(sizes.size())) {
114     if (sizes[i] == 0) {
115       return 0;
116     }
117 
118     size += strides[i] * (sizes[i] - 1);
119   }
120   return itemsize_bytes * (storage_offset + size);
121 #endif
122 }
123 
computeStorageNbytesContiguous(SymIntArrayRef sizes,const SymInt & itemsize_bytes,const SymInt & storage_offset)124 SymInt computeStorageNbytesContiguous(
125     SymIntArrayRef sizes,
126     const SymInt& itemsize_bytes,
127     const SymInt& storage_offset
128   ) {
129   const auto numel = c10::multiply_integers(sizes);
130   return itemsize_bytes * (storage_offset + numel);
131 }
132 
133 // not including mobile-only macros in this function,
134 // since mobile shouldn't be using symints.
computeStorageNbytes(SymIntArrayRef sizes,SymIntArrayRef strides,const SymInt & itemsize_bytes,const SymInt & storage_offset)135 SymInt computeStorageNbytes(
136     SymIntArrayRef sizes,
137     SymIntArrayRef strides,
138     const SymInt& itemsize_bytes,
139     const SymInt& storage_offset
140   ) {
141   TORCH_CHECK(
142     sizes.size() == strides.size(),
143     "dimensionality of sizes (",
144     sizes.size(),
145     ") must match dimensionality of strides (",
146     strides.size(),
147     ")");
148 
149   // size of the underlying storage is 1 bigger than the offset
150   // of the last element according to stride
151   SymInt size = 1;
152   for (const auto i : c10::irange(sizes.size())) {
153     if (TORCH_GUARD_SIZE_OBLIVIOUS(sizes[i].sym_eq(0))) {
154       return 0;
155     }
156 
157     size += strides[i] * (sizes[i] - 1);
158   }
159   return itemsize_bytes * (storage_offset + size);
160 }
161 
162 template <typename T>
_empty_generic(ArrayRef<T> size,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type,std::optional<c10::MemoryFormat> memory_format_opt)163 TensorBase _empty_generic(
164     ArrayRef<T> size,
165     c10::Allocator* allocator,
166     c10::DispatchKeySet ks,
167     ScalarType scalar_type,
168     std::optional<c10::MemoryFormat> memory_format_opt) {
169   at::detail::check_size_nonnegative(size);
170   at::detail::raise_warning_for_complex_half(scalar_type);
171   caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
172   auto size_bytes = computeStorageNbytesContiguous(size, dtype.itemsize());
173   auto storage_impl = c10::make_intrusive<StorageImpl>(
174       c10::StorageImpl::use_byte_size_t(),
175       size_bytes,
176       allocator,
177       /*resizeable=*/true);
178 
179   auto tensor = detail::make_tensor_base<TensorImpl>(
180       std::move(storage_impl), ks, dtype);
181   // Default TensorImpl has size [0]
182   // NB: test for meta dispatch key to avoid guarding on zero-ness
183   if (ks.has(c10::DispatchKey::Meta) || size.size() != 1 || size[0] != 0) {
184     tensor.unsafeGetTensorImpl()->generic_set_sizes_contiguous(size);
185   }
186 
187   if (memory_format_opt.has_value()) {
188     // Restriding a just-created empty contiguous tensor does nothing.
189     if (*memory_format_opt != MemoryFormat::Contiguous) {
190       tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt);
191     }
192   }
193 
194   return tensor;
195 }
196 
empty_generic(IntArrayRef size,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type,std::optional<c10::MemoryFormat> memory_format_opt)197 TensorBase empty_generic(
198     IntArrayRef size,
199     c10::Allocator* allocator,
200     c10::DispatchKeySet ks,
201     ScalarType scalar_type,
202     std::optional<c10::MemoryFormat> memory_format_opt) {
203   return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
204 }
205 
empty_generic_symint(SymIntArrayRef size,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type,std::optional<c10::MemoryFormat> memory_format_opt)206 TensorBase empty_generic_symint(
207     SymIntArrayRef size,
208     c10::Allocator* allocator,
209     c10::DispatchKeySet ks,
210     ScalarType scalar_type,
211     std::optional<c10::MemoryFormat> memory_format_opt) {
212   return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
213 }
214 
215 template <typename T>
_empty_strided_generic(T size,T stride,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type)216 TensorBase _empty_strided_generic(
217     T size,
218     T stride,
219     c10::Allocator* allocator,
220     c10::DispatchKeySet ks,
221     ScalarType scalar_type) {
222   at::detail::check_size_nonnegative(size);
223   at::detail::raise_warning_for_complex_half(scalar_type);
224   caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
225   auto size_bytes = computeStorageNbytes(size, stride, dtype.itemsize());
226   auto storage_impl = c10::make_intrusive<StorageImpl>(
227       c10::StorageImpl::use_byte_size_t(),
228       size_bytes,
229       allocator,
230       /*resizeable=*/true);
231 
232   auto tensor = detail::make_tensor_base<TensorImpl>(
233       std::move(storage_impl), ks, dtype);
234   tensor.unsafeGetTensorImpl()->set_sizes_and_strides(size, stride);
235   return tensor;
236 }
237 
empty_strided_generic(IntArrayRef size,IntArrayRef stride,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type)238 TensorBase empty_strided_generic(
239     IntArrayRef size,
240     IntArrayRef stride,
241     c10::Allocator* allocator,
242     c10::DispatchKeySet ks,
243     ScalarType scalar_type) {
244   return _empty_strided_generic<IntArrayRef>(size, stride, allocator, ks, scalar_type);
245 }
246 
empty_strided_symint_generic(SymIntArrayRef size,SymIntArrayRef stride,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type)247 TensorBase empty_strided_symint_generic(
248     SymIntArrayRef size,
249     SymIntArrayRef stride,
250     c10::Allocator* allocator,
251     c10::DispatchKeySet ks,
252     ScalarType scalar_type) {
253   return _empty_strided_generic<SymIntArrayRef>(size, stride, allocator, ks, scalar_type);
254 }
255 
empty_cpu(IntArrayRef size,ScalarType dtype,bool pin_memory,std::optional<c10::MemoryFormat> memory_format_opt)256 TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory,
257                      std::optional<c10::MemoryFormat> memory_format_opt) {
258   auto allocator = GetCPUAllocatorMaybePinned(pin_memory);
259   constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
260   return empty_generic(size, allocator, cpu_ks, dtype, memory_format_opt);
261 }
262 
empty_cpu(IntArrayRef size,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt,std::optional<c10::MemoryFormat> memory_format_opt)263 TensorBase empty_cpu(
264     IntArrayRef size,
265     std::optional<ScalarType> dtype_opt,
266     std::optional<Layout> layout_opt,
267     std::optional<Device> device_opt,
268     std::optional<bool> pin_memory_opt,
269     std::optional<c10::MemoryFormat> memory_format_opt) {
270   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
271   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
272 
273   auto pin_memory = pinned_memory_or_default(pin_memory_opt);
274   auto dtype = dtype_or_default(dtype_opt);
275   return empty_cpu(size, dtype, pin_memory, memory_format_opt);
276 }
277 
empty_cpu(IntArrayRef size,const TensorOptions & options)278 TensorBase empty_cpu(
279     IntArrayRef size, const TensorOptions &options) {
280   return at::detail::empty_cpu(
281       size,
282       optTypeMetaToScalarType(options.dtype_opt()),
283       options.layout_opt(),
284       options.device_opt(),
285       options.pinned_memory_opt(),
286       options.memory_format_opt());
287 }
288 
empty_strided_cpu(IntArrayRef size,IntArrayRef stride,ScalarType dtype,bool pin_memory)289 TensorBase empty_strided_cpu(IntArrayRef size, IntArrayRef stride,
290                              ScalarType dtype, bool pin_memory) {
291   auto allocator = at::detail::GetCPUAllocatorMaybePinned(pin_memory);
292   constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
293   return at::detail::empty_strided_generic(
294       size, stride, allocator, cpu_ks, dtype);
295 }
296 
empty_strided_cpu(IntArrayRef size,IntArrayRef stride,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt)297 TensorBase empty_strided_cpu(
298     IntArrayRef size,
299     IntArrayRef stride,
300     std::optional<ScalarType> dtype_opt,
301     std::optional<Layout> layout_opt,
302     std::optional<Device> device_opt,
303     std::optional<bool> pin_memory_opt) {
304   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
305   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
306 
307   auto pin_memory = pinned_memory_or_default(pin_memory_opt);
308   auto dtype = dtype_or_default(dtype_opt);
309   return at::detail::empty_strided_cpu(size, stride, dtype, pin_memory);
310 }
311 
empty_strided_cpu(IntArrayRef size,IntArrayRef stride,const TensorOptions & options)312 TensorBase empty_strided_cpu(
313     IntArrayRef size,
314     IntArrayRef stride,
315     const TensorOptions &options) {
316   return at::detail::empty_strided_cpu(
317       size,
318       stride,
319       optTypeMetaToScalarType(options.dtype_opt()),
320       options.layout_opt(),
321       options.device_opt(),
322       options.pinned_memory_opt());
323 }
324 
325 // The meta allocator ignores whatever allocation is requested and always
326 // gives you nullptr
327 struct MetaAllocator final : public at::Allocator {
328   MetaAllocator() = default;
329   ~MetaAllocator() override = default;
deleterat::detail::MetaAllocator330   static void deleter(void* const pointer) {
331     TORCH_INTERNAL_ASSERT(!pointer);
332   }
allocateat::detail::MetaAllocator333   DataPtr allocate(const size_t nbytes [[maybe_unused]]) override {
334     return {nullptr, nullptr, &deleter, at::Device(DeviceType::Meta)};
335   }
raw_deleterat::detail::MetaAllocator336   DeleterFnPtr raw_deleter() const override {
337     return deleter;
338   }
copy_dataat::detail::MetaAllocator339   void copy_data(void* dest, const void* src, std::size_t count) const final {}
340 };
341 
342 static MetaAllocator g_meta_alloc;
343 
344 REGISTER_ALLOCATOR(kMeta, &g_meta_alloc);
345 
empty_meta(IntArrayRef size,ScalarType dtype,std::optional<c10::MemoryFormat> memory_format_opt)346 TensorBase empty_meta(IntArrayRef size, ScalarType dtype,
347                      std::optional<c10::MemoryFormat> memory_format_opt) {
348   auto *allocator = GetAllocator(kMeta);
349   constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
350   return at::detail::empty_generic(
351       size, allocator, meta_dks, dtype, memory_format_opt);
352 }
353 
empty_meta(IntArrayRef size,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt,std::optional<c10::MemoryFormat> memory_format_opt)354 TensorBase empty_meta(
355   IntArrayRef size,
356   std::optional<ScalarType> dtype_opt,
357   std::optional<Layout> layout_opt,
358   std::optional<Device> device_opt,
359   std::optional<bool> pin_memory_opt,
360   std::optional<c10::MemoryFormat> memory_format_opt
361 ) {
362   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
363   // NB: because there is no SparseMeta (yet), non-strided layout is
364   // exerciseable
365   TORCH_CHECK_NOT_IMPLEMENTED(
366     layout_or_default(layout_opt) == Layout::Strided,
367     "non-strided meta tensors not supported yet"
368   );
369 
370   auto dtype = dtype_or_default(dtype_opt);
371   return empty_meta(size, dtype, memory_format_opt);
372 }
373 
empty_symint_meta(SymIntArrayRef size,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt,std::optional<c10::MemoryFormat> memory_format_opt)374 TensorBase empty_symint_meta(
375   SymIntArrayRef size,
376   std::optional<ScalarType> dtype_opt,
377   std::optional<Layout> layout_opt,
378   std::optional<Device> device_opt,
379   std::optional<bool> pin_memory_opt,
380   std::optional<c10::MemoryFormat> memory_format_opt
381 ) {
382   auto *allocator = GetAllocator(kMeta);
383   constexpr c10::DispatchKeySet ks(c10::DispatchKey::Meta);
384   auto scalar_type = dtype_or_default(dtype_opt);
385   return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
386 }
387 
empty_meta(IntArrayRef size,const TensorOptions & options)388 TensorBase empty_meta(
389     IntArrayRef size, const TensorOptions &options) {
390   return at::detail::empty_meta(
391       size,
392       optTypeMetaToScalarType(options.dtype_opt()),
393       options.layout_opt(),
394       options.device_opt(),
395       options.pinned_memory_opt(),
396       options.memory_format_opt());
397 }
398 
empty_strided_meta(IntArrayRef size,IntArrayRef stride,ScalarType dtype)399 TensorBase empty_strided_meta(IntArrayRef size, IntArrayRef stride,
400                               ScalarType dtype) {
401   auto *allocator = GetAllocator(kMeta);
402   constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
403   return at::detail::empty_strided_generic(
404       size, stride, allocator, meta_dks, dtype);
405 }
406 
empty_strided_meta(IntArrayRef size,IntArrayRef stride,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt)407 TensorBase empty_strided_meta(
408     IntArrayRef size,
409     IntArrayRef stride,
410     std::optional<ScalarType> dtype_opt,
411     std::optional<Layout> layout_opt,
412     std::optional<Device> device_opt,
413     std::optional<bool> pin_memory_opt) {
414   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
415   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
416 
417   auto dtype = dtype_or_default(dtype_opt);
418   return at::detail::empty_strided_meta(size, stride, dtype);
419 }
420 
empty_strided_meta(IntArrayRef size,IntArrayRef stride,const TensorOptions & options)421 TensorBase empty_strided_meta(
422     IntArrayRef size,
423     IntArrayRef stride,
424     const TensorOptions &options) {
425   return at::detail::empty_strided_meta(
426       size,
427       stride,
428       optTypeMetaToScalarType(options.dtype_opt()),
429       options.layout_opt(),
430       options.device_opt(),
431       options.pinned_memory_opt());
432 }
433 
empty_strided_symint_meta(SymIntArrayRef size,SymIntArrayRef stride,ScalarType dtype)434 TensorBase empty_strided_symint_meta(SymIntArrayRef size, SymIntArrayRef stride,
435                               ScalarType dtype) {
436   auto *allocator = GetAllocator(kMeta);
437   constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
438   return at::detail::empty_strided_symint_generic(
439       size, stride, allocator, meta_dks, dtype);
440 }
441 
empty_strided_symint_meta(SymIntArrayRef size,SymIntArrayRef stride,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt)442 TensorBase empty_strided_symint_meta(
443     SymIntArrayRef size,
444     SymIntArrayRef stride,
445     std::optional<ScalarType> dtype_opt,
446     std::optional<Layout> layout_opt,
447     std::optional<Device> device_opt) {
448   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
449   TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
450 
451   auto dtype = dtype_or_default(dtype_opt);
452   return at::detail::empty_strided_symint_meta(size, stride, dtype);
453 }
454 
empty_strided_symint_meta(SymIntArrayRef size,SymIntArrayRef stride,const TensorOptions & options)455 TensorBase empty_strided_symint_meta(
456     SymIntArrayRef size,
457     SymIntArrayRef stride,
458     const TensorOptions &options) {
459   return at::detail::empty_strided_symint_meta(
460       size,
461       stride,
462       optTypeMetaToScalarType(options.dtype_opt()),
463       options.layout_opt(),
464       options.device_opt());
465 }
466 
467 } // namespace at::detail
468