xref: /aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/TensorShapeCUDA.cpp (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
2 #include <ATen/core/Tensor.h>
3 #include <ATen/cuda/CUDAContext.h>
4 #include <ATen/native/Resize.h>
5 #include <ATen/native/cuda/Resize.h>
6 
7 #ifndef AT_PER_OPERATOR_HEADERS
8 #include <ATen/NativeFunctions.h>
9 #else
10 #include <ATen/ops/set_native.h>
11 #endif
12 
13 namespace at::native {
14 
15 // this needs to be split along CPU/CUDA lines because we don't have a consistent
16 // way of getting the allocator to use for a device (c10::GetAllocator is not
17 // the same as at::cuda::getCUDADeviceAllocator().
set_cuda_(Tensor & result)18 Tensor& set_cuda_(Tensor& result) {
19   caffe2::TypeMeta dtype = result.dtype();
20   Storage storage(
21       Storage::use_byte_size_t(),
22       0,
23       at::cuda::getCUDADeviceAllocator(),
24       true);
25   result.set_(storage, 0, {0}, {});
26   TORCH_INTERNAL_ASSERT(dtype == result.dtype());
27   return result;
28 }
29 
30 // unify with cuda implementation?  This is not done to avoid a dispatch in resize_impl_cpu_
set_storage_cuda_(Tensor & result,Storage storage,int64_t storage_offset,IntArrayRef size,IntArrayRef stride)31 Tensor& set_storage_cuda_(Tensor& result, Storage storage, int64_t storage_offset, IntArrayRef size, IntArrayRef stride) {
32   checkSetStorage(result, storage, storage_offset, size, stride);
33 
34   result.unsafeGetTensorImpl()->set_storage_offset(storage_offset);
35   at::OptionalIntArrayRef stride_opt = stride.data() != nullptr ?
36                                           at::OptionalIntArrayRef(stride) : std::nullopt;
37   at::native::resize_impl_cuda_(result.unsafeGetTensorImpl(), size, stride_opt);
38   return result;
39 }
40 
41 } // namespace at::native
42