1 #define TORCH_ASSERT_NO_OPERATORS
2 #include <ATen/EmptyTensor.h>
3 #include <ATen/detail/CUDAHooksInterface.h>
4 #include <ATen/detail/XPUHooksInterface.h>
5 #include <ATen/Context.h>
6 #include <ATen/detail/PrivateUse1HooksInterface.h>
7 #include <c10/core/CPUAllocator.h>
8 #include <c10/util/safe_numerics.h>
9
10 #include <limits>
11
12 namespace at::detail {
13 namespace {
GetCPUAllocatorMaybePinned(bool pin_memory)14 c10::Allocator* GetCPUAllocatorMaybePinned(bool pin_memory) {
15 if (pin_memory) {
16 // NB: This is not quite right, if you somehow had both CUDA and PrivateUse1 initialized
17 // in the same PyTorch build, you would ONLY ever get the CUDA pinned memory allocator.
18 // To properly support this, see https://github.com/pytorch/pytorch/issues/14560
19 if (at::globalContext().hasCUDA()) {
20 return at::detail::getCUDAHooks().getPinnedMemoryAllocator();
21 } else if (at::globalContext().hasXPU()) {
22 return at::detail::getXPUHooks().getPinnedMemoryAllocator();
23 } else if(at::isPrivateUse1HooksRegistered()) {
24 return at::detail::getPrivateUse1Hooks().getPinnedMemoryAllocator();
25 } else {
26 TORCH_CHECK(false, "Need to provide pin_memory allocator to use pin memory.")
27 }
28 }
29 return c10::GetCPUAllocator();
30 }
31
32 #ifndef C10_MOBILE
storage_max()33 constexpr uint64_t storage_max() {
34 // int64_t and size_t are used somewhat inconsistently throughout ATen.
35 // To be safe, storage size calculations must fit in both types.
36 constexpr auto int64_max = static_cast<uint64_t>(
37 std::numeric_limits<int64_t>::max());
38 constexpr auto size_max = static_cast<uint64_t>(
39 std::numeric_limits<size_t>::max());
40 return std::min(int64_max, size_max);
41 }
42 #endif
43
raise_warning_for_complex_half(ScalarType dtype)44 inline void raise_warning_for_complex_half(ScalarType dtype) {
45 if (dtype == kComplexHalf) {
46 TORCH_WARN_ONCE(
47 "ComplexHalf support is experimental and many operators don't support it yet.");
48 }
49 }
50
51 } // namespace (anonymous)
52
computeStorageNbytesContiguous(IntArrayRef sizes,size_t itemsize_bytes,size_t storage_offset)53 size_t computeStorageNbytesContiguous(
54 IntArrayRef sizes,
55 size_t itemsize_bytes,
56 size_t storage_offset
57 ) {
58 // Ignore overflow checks on mobile
59 #ifndef C10_MOBILE
60 uint64_t size = 1;
61 bool overflowed = c10::safe_multiplies_u64(sizes, &size);
62 overflowed |= c10::add_overflows(size, storage_offset, &size);
63 overflowed |= c10::mul_overflows(size, itemsize_bytes, &size);
64 overflowed |= size > storage_max();
65 TORCH_CHECK(!overflowed,
66 "Storage size calculation overflowed with sizes=", sizes);
67 return static_cast<size_t>(size);
68 #else
69 const auto numel = c10::multiply_integers(sizes);
70 return itemsize_bytes * (storage_offset + numel);
71 #endif
72 }
73
computeStorageNbytes(IntArrayRef sizes,IntArrayRef strides,size_t itemsize_bytes,size_t storage_offset)74 size_t computeStorageNbytes(
75 IntArrayRef sizes,
76 IntArrayRef strides,
77 size_t itemsize_bytes,
78 size_t storage_offset
79 ) {
80 TORCH_CHECK(
81 sizes.size() == strides.size(),
82 "dimensionality of sizes (",
83 sizes.size(),
84 ") must match dimensionality of strides (",
85 strides.size(),
86 ")");
87
88 // Ignore overflow checks on mobile
89 #ifndef C10_MOBILE
90 // size of the underlying storage is 1 bigger than the offset
91 // of the last element according to stride
92 uint64_t size = storage_offset + 1;
93 bool overflowed = false;
94 for (const auto i : c10::irange(sizes.size())) {
95 if (sizes[i] == 0) {
96 return 0;
97 }
98
99 uint64_t strided_size = 0;
100 overflowed |= c10::mul_overflows(strides[i], sizes[i] - 1, &strided_size);
101 overflowed |= c10::add_overflows(size, strided_size, &size);
102 }
103 overflowed |= c10::mul_overflows(size, itemsize_bytes, &size);
104 overflowed |= size > storage_max();
105 TORCH_CHECK(!overflowed,
106 "Storage size calculation overflowed with sizes=",
107 sizes, " and strides=", strides);
108 return static_cast<size_t>(size);
109 #else
110 // size of the underlying storage is 1 bigger than the offset
111 // of the last element according to stride
112 uint64_t size = 1;
113 for (const auto i : c10::irange(sizes.size())) {
114 if (sizes[i] == 0) {
115 return 0;
116 }
117
118 size += strides[i] * (sizes[i] - 1);
119 }
120 return itemsize_bytes * (storage_offset + size);
121 #endif
122 }
123
computeStorageNbytesContiguous(SymIntArrayRef sizes,const SymInt & itemsize_bytes,const SymInt & storage_offset)124 SymInt computeStorageNbytesContiguous(
125 SymIntArrayRef sizes,
126 const SymInt& itemsize_bytes,
127 const SymInt& storage_offset
128 ) {
129 const auto numel = c10::multiply_integers(sizes);
130 return itemsize_bytes * (storage_offset + numel);
131 }
132
133 // not including mobile-only macros in this function,
134 // since mobile shouldn't be using symints.
computeStorageNbytes(SymIntArrayRef sizes,SymIntArrayRef strides,const SymInt & itemsize_bytes,const SymInt & storage_offset)135 SymInt computeStorageNbytes(
136 SymIntArrayRef sizes,
137 SymIntArrayRef strides,
138 const SymInt& itemsize_bytes,
139 const SymInt& storage_offset
140 ) {
141 TORCH_CHECK(
142 sizes.size() == strides.size(),
143 "dimensionality of sizes (",
144 sizes.size(),
145 ") must match dimensionality of strides (",
146 strides.size(),
147 ")");
148
149 // size of the underlying storage is 1 bigger than the offset
150 // of the last element according to stride
151 SymInt size = 1;
152 for (const auto i : c10::irange(sizes.size())) {
153 if (TORCH_GUARD_SIZE_OBLIVIOUS(sizes[i].sym_eq(0))) {
154 return 0;
155 }
156
157 size += strides[i] * (sizes[i] - 1);
158 }
159 return itemsize_bytes * (storage_offset + size);
160 }
161
162 template <typename T>
_empty_generic(ArrayRef<T> size,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type,std::optional<c10::MemoryFormat> memory_format_opt)163 TensorBase _empty_generic(
164 ArrayRef<T> size,
165 c10::Allocator* allocator,
166 c10::DispatchKeySet ks,
167 ScalarType scalar_type,
168 std::optional<c10::MemoryFormat> memory_format_opt) {
169 at::detail::check_size_nonnegative(size);
170 at::detail::raise_warning_for_complex_half(scalar_type);
171 caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
172 auto size_bytes = computeStorageNbytesContiguous(size, dtype.itemsize());
173 auto storage_impl = c10::make_intrusive<StorageImpl>(
174 c10::StorageImpl::use_byte_size_t(),
175 size_bytes,
176 allocator,
177 /*resizeable=*/true);
178
179 auto tensor = detail::make_tensor_base<TensorImpl>(
180 std::move(storage_impl), ks, dtype);
181 // Default TensorImpl has size [0]
182 // NB: test for meta dispatch key to avoid guarding on zero-ness
183 if (ks.has(c10::DispatchKey::Meta) || size.size() != 1 || size[0] != 0) {
184 tensor.unsafeGetTensorImpl()->generic_set_sizes_contiguous(size);
185 }
186
187 if (memory_format_opt.has_value()) {
188 // Restriding a just-created empty contiguous tensor does nothing.
189 if (*memory_format_opt != MemoryFormat::Contiguous) {
190 tensor.unsafeGetTensorImpl()->empty_tensor_restride(*memory_format_opt);
191 }
192 }
193
194 return tensor;
195 }
196
empty_generic(IntArrayRef size,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type,std::optional<c10::MemoryFormat> memory_format_opt)197 TensorBase empty_generic(
198 IntArrayRef size,
199 c10::Allocator* allocator,
200 c10::DispatchKeySet ks,
201 ScalarType scalar_type,
202 std::optional<c10::MemoryFormat> memory_format_opt) {
203 return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
204 }
205
empty_generic_symint(SymIntArrayRef size,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type,std::optional<c10::MemoryFormat> memory_format_opt)206 TensorBase empty_generic_symint(
207 SymIntArrayRef size,
208 c10::Allocator* allocator,
209 c10::DispatchKeySet ks,
210 ScalarType scalar_type,
211 std::optional<c10::MemoryFormat> memory_format_opt) {
212 return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
213 }
214
215 template <typename T>
_empty_strided_generic(T size,T stride,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type)216 TensorBase _empty_strided_generic(
217 T size,
218 T stride,
219 c10::Allocator* allocator,
220 c10::DispatchKeySet ks,
221 ScalarType scalar_type) {
222 at::detail::check_size_nonnegative(size);
223 at::detail::raise_warning_for_complex_half(scalar_type);
224 caffe2::TypeMeta dtype = scalarTypeToTypeMeta(scalar_type);
225 auto size_bytes = computeStorageNbytes(size, stride, dtype.itemsize());
226 auto storage_impl = c10::make_intrusive<StorageImpl>(
227 c10::StorageImpl::use_byte_size_t(),
228 size_bytes,
229 allocator,
230 /*resizeable=*/true);
231
232 auto tensor = detail::make_tensor_base<TensorImpl>(
233 std::move(storage_impl), ks, dtype);
234 tensor.unsafeGetTensorImpl()->set_sizes_and_strides(size, stride);
235 return tensor;
236 }
237
empty_strided_generic(IntArrayRef size,IntArrayRef stride,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type)238 TensorBase empty_strided_generic(
239 IntArrayRef size,
240 IntArrayRef stride,
241 c10::Allocator* allocator,
242 c10::DispatchKeySet ks,
243 ScalarType scalar_type) {
244 return _empty_strided_generic<IntArrayRef>(size, stride, allocator, ks, scalar_type);
245 }
246
empty_strided_symint_generic(SymIntArrayRef size,SymIntArrayRef stride,c10::Allocator * allocator,c10::DispatchKeySet ks,ScalarType scalar_type)247 TensorBase empty_strided_symint_generic(
248 SymIntArrayRef size,
249 SymIntArrayRef stride,
250 c10::Allocator* allocator,
251 c10::DispatchKeySet ks,
252 ScalarType scalar_type) {
253 return _empty_strided_generic<SymIntArrayRef>(size, stride, allocator, ks, scalar_type);
254 }
255
empty_cpu(IntArrayRef size,ScalarType dtype,bool pin_memory,std::optional<c10::MemoryFormat> memory_format_opt)256 TensorBase empty_cpu(IntArrayRef size, ScalarType dtype, bool pin_memory,
257 std::optional<c10::MemoryFormat> memory_format_opt) {
258 auto allocator = GetCPUAllocatorMaybePinned(pin_memory);
259 constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
260 return empty_generic(size, allocator, cpu_ks, dtype, memory_format_opt);
261 }
262
empty_cpu(IntArrayRef size,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt,std::optional<c10::MemoryFormat> memory_format_opt)263 TensorBase empty_cpu(
264 IntArrayRef size,
265 std::optional<ScalarType> dtype_opt,
266 std::optional<Layout> layout_opt,
267 std::optional<Device> device_opt,
268 std::optional<bool> pin_memory_opt,
269 std::optional<c10::MemoryFormat> memory_format_opt) {
270 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
271 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
272
273 auto pin_memory = pinned_memory_or_default(pin_memory_opt);
274 auto dtype = dtype_or_default(dtype_opt);
275 return empty_cpu(size, dtype, pin_memory, memory_format_opt);
276 }
277
empty_cpu(IntArrayRef size,const TensorOptions & options)278 TensorBase empty_cpu(
279 IntArrayRef size, const TensorOptions &options) {
280 return at::detail::empty_cpu(
281 size,
282 optTypeMetaToScalarType(options.dtype_opt()),
283 options.layout_opt(),
284 options.device_opt(),
285 options.pinned_memory_opt(),
286 options.memory_format_opt());
287 }
288
empty_strided_cpu(IntArrayRef size,IntArrayRef stride,ScalarType dtype,bool pin_memory)289 TensorBase empty_strided_cpu(IntArrayRef size, IntArrayRef stride,
290 ScalarType dtype, bool pin_memory) {
291 auto allocator = at::detail::GetCPUAllocatorMaybePinned(pin_memory);
292 constexpr c10::DispatchKeySet cpu_ks(c10::DispatchKey::CPU);
293 return at::detail::empty_strided_generic(
294 size, stride, allocator, cpu_ks, dtype);
295 }
296
empty_strided_cpu(IntArrayRef size,IntArrayRef stride,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt)297 TensorBase empty_strided_cpu(
298 IntArrayRef size,
299 IntArrayRef stride,
300 std::optional<ScalarType> dtype_opt,
301 std::optional<Layout> layout_opt,
302 std::optional<Device> device_opt,
303 std::optional<bool> pin_memory_opt) {
304 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::CPU);
305 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
306
307 auto pin_memory = pinned_memory_or_default(pin_memory_opt);
308 auto dtype = dtype_or_default(dtype_opt);
309 return at::detail::empty_strided_cpu(size, stride, dtype, pin_memory);
310 }
311
empty_strided_cpu(IntArrayRef size,IntArrayRef stride,const TensorOptions & options)312 TensorBase empty_strided_cpu(
313 IntArrayRef size,
314 IntArrayRef stride,
315 const TensorOptions &options) {
316 return at::detail::empty_strided_cpu(
317 size,
318 stride,
319 optTypeMetaToScalarType(options.dtype_opt()),
320 options.layout_opt(),
321 options.device_opt(),
322 options.pinned_memory_opt());
323 }
324
325 // The meta allocator ignores whatever allocation is requested and always
326 // gives you nullptr
327 struct MetaAllocator final : public at::Allocator {
328 MetaAllocator() = default;
329 ~MetaAllocator() override = default;
deleterat::detail::MetaAllocator330 static void deleter(void* const pointer) {
331 TORCH_INTERNAL_ASSERT(!pointer);
332 }
allocateat::detail::MetaAllocator333 DataPtr allocate(const size_t nbytes [[maybe_unused]]) override {
334 return {nullptr, nullptr, &deleter, at::Device(DeviceType::Meta)};
335 }
raw_deleterat::detail::MetaAllocator336 DeleterFnPtr raw_deleter() const override {
337 return deleter;
338 }
copy_dataat::detail::MetaAllocator339 void copy_data(void* dest, const void* src, std::size_t count) const final {}
340 };
341
342 static MetaAllocator g_meta_alloc;
343
344 REGISTER_ALLOCATOR(kMeta, &g_meta_alloc);
345
empty_meta(IntArrayRef size,ScalarType dtype,std::optional<c10::MemoryFormat> memory_format_opt)346 TensorBase empty_meta(IntArrayRef size, ScalarType dtype,
347 std::optional<c10::MemoryFormat> memory_format_opt) {
348 auto *allocator = GetAllocator(kMeta);
349 constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
350 return at::detail::empty_generic(
351 size, allocator, meta_dks, dtype, memory_format_opt);
352 }
353
empty_meta(IntArrayRef size,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt,std::optional<c10::MemoryFormat> memory_format_opt)354 TensorBase empty_meta(
355 IntArrayRef size,
356 std::optional<ScalarType> dtype_opt,
357 std::optional<Layout> layout_opt,
358 std::optional<Device> device_opt,
359 std::optional<bool> pin_memory_opt,
360 std::optional<c10::MemoryFormat> memory_format_opt
361 ) {
362 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
363 // NB: because there is no SparseMeta (yet), non-strided layout is
364 // exerciseable
365 TORCH_CHECK_NOT_IMPLEMENTED(
366 layout_or_default(layout_opt) == Layout::Strided,
367 "non-strided meta tensors not supported yet"
368 );
369
370 auto dtype = dtype_or_default(dtype_opt);
371 return empty_meta(size, dtype, memory_format_opt);
372 }
373
empty_symint_meta(SymIntArrayRef size,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt,std::optional<c10::MemoryFormat> memory_format_opt)374 TensorBase empty_symint_meta(
375 SymIntArrayRef size,
376 std::optional<ScalarType> dtype_opt,
377 std::optional<Layout> layout_opt,
378 std::optional<Device> device_opt,
379 std::optional<bool> pin_memory_opt,
380 std::optional<c10::MemoryFormat> memory_format_opt
381 ) {
382 auto *allocator = GetAllocator(kMeta);
383 constexpr c10::DispatchKeySet ks(c10::DispatchKey::Meta);
384 auto scalar_type = dtype_or_default(dtype_opt);
385 return _empty_generic(size, allocator, ks, scalar_type, memory_format_opt);
386 }
387
empty_meta(IntArrayRef size,const TensorOptions & options)388 TensorBase empty_meta(
389 IntArrayRef size, const TensorOptions &options) {
390 return at::detail::empty_meta(
391 size,
392 optTypeMetaToScalarType(options.dtype_opt()),
393 options.layout_opt(),
394 options.device_opt(),
395 options.pinned_memory_opt(),
396 options.memory_format_opt());
397 }
398
empty_strided_meta(IntArrayRef size,IntArrayRef stride,ScalarType dtype)399 TensorBase empty_strided_meta(IntArrayRef size, IntArrayRef stride,
400 ScalarType dtype) {
401 auto *allocator = GetAllocator(kMeta);
402 constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
403 return at::detail::empty_strided_generic(
404 size, stride, allocator, meta_dks, dtype);
405 }
406
empty_strided_meta(IntArrayRef size,IntArrayRef stride,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt,std::optional<bool> pin_memory_opt)407 TensorBase empty_strided_meta(
408 IntArrayRef size,
409 IntArrayRef stride,
410 std::optional<ScalarType> dtype_opt,
411 std::optional<Layout> layout_opt,
412 std::optional<Device> device_opt,
413 std::optional<bool> pin_memory_opt) {
414 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
415 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
416
417 auto dtype = dtype_or_default(dtype_opt);
418 return at::detail::empty_strided_meta(size, stride, dtype);
419 }
420
empty_strided_meta(IntArrayRef size,IntArrayRef stride,const TensorOptions & options)421 TensorBase empty_strided_meta(
422 IntArrayRef size,
423 IntArrayRef stride,
424 const TensorOptions &options) {
425 return at::detail::empty_strided_meta(
426 size,
427 stride,
428 optTypeMetaToScalarType(options.dtype_opt()),
429 options.layout_opt(),
430 options.device_opt(),
431 options.pinned_memory_opt());
432 }
433
empty_strided_symint_meta(SymIntArrayRef size,SymIntArrayRef stride,ScalarType dtype)434 TensorBase empty_strided_symint_meta(SymIntArrayRef size, SymIntArrayRef stride,
435 ScalarType dtype) {
436 auto *allocator = GetAllocator(kMeta);
437 constexpr c10::DispatchKeySet meta_dks(c10::DispatchKey::Meta);
438 return at::detail::empty_strided_symint_generic(
439 size, stride, allocator, meta_dks, dtype);
440 }
441
empty_strided_symint_meta(SymIntArrayRef size,SymIntArrayRef stride,std::optional<ScalarType> dtype_opt,std::optional<Layout> layout_opt,std::optional<Device> device_opt)442 TensorBase empty_strided_symint_meta(
443 SymIntArrayRef size,
444 SymIntArrayRef stride,
445 std::optional<ScalarType> dtype_opt,
446 std::optional<Layout> layout_opt,
447 std::optional<Device> device_opt) {
448 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(device_or_default(device_opt).type() == DeviceType::Meta);
449 TORCH_INTERNAL_ASSERT_DEBUG_ONLY(layout_or_default(layout_opt) == Layout::Strided);
450
451 auto dtype = dtype_or_default(dtype_opt);
452 return at::detail::empty_strided_symint_meta(size, stride, dtype);
453 }
454
empty_strided_symint_meta(SymIntArrayRef size,SymIntArrayRef stride,const TensorOptions & options)455 TensorBase empty_strided_symint_meta(
456 SymIntArrayRef size,
457 SymIntArrayRef stride,
458 const TensorOptions &options) {
459 return at::detail::empty_strided_symint_meta(
460 size,
461 stride,
462 optTypeMetaToScalarType(options.dtype_opt()),
463 options.layout_opt(),
464 options.device_opt());
465 }
466
467 } // namespace at::detail
468