/aosp_15_r20/external/pytorch/aten/src/ATen/native/mkldnn/ |
H A D | MKLDNNConversions.cpp | 51 Tensor cpu_tensor = at::empty( in mkldnn_to_dense() local 54 if (stensor.is_empty()) return cpu_tensor; in mkldnn_to_dense() 57 ? stensor.to_public(cpu_tensor.template data_ptr<float>(), in mkldnn_to_dense() 60 ? stensor.to_public(cpu_tensor.template data_ptr<BFloat16>(), in mkldnn_to_dense() 63 ? stensor.to_public(cpu_tensor.template data_ptr<Half>(), in mkldnn_to_dense() 66 ? stensor.to_public(cpu_tensor.template data_ptr<uint8_t>(), in mkldnn_to_dense() 68 : stensor.to_public(cpu_tensor.template data_ptr<int8_t>(), in mkldnn_to_dense() 73 cpu_tensor.as_strided_(dims, pub_tensor.get_strides()); in mkldnn_to_dense() 75 return cpu_tensor.contiguous().resize_(dims, c10::MemoryFormat::Contiguous); in mkldnn_to_dense() 78 Tensor dense_to_mkldnn(const Tensor& cpu_tensor, std::optional<ScalarType> dtype) { in dense_to_mkldnn() argument [all …]
|
/aosp_15_r20/external/tensorflow/tensorflow/compiler/jit/ |
H A D | xla_device_context.cc | 114 void XlaDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, in CopyCPUTensorToDevice() argument 119 if (cpu_tensor->NumElements() == 0) { in CopyCPUTensorToDevice() 126 << reinterpret_cast<const void*>(cpu_tensor->tensor_data().data()) in CopyCPUTensorToDevice() 129 << " " << cpu_tensor->NumElements() << " " in CopyCPUTensorToDevice() 130 << cpu_tensor->shape().DebugString() << " " in CopyCPUTensorToDevice() 158 static_cast<const char*>(DMAHelper::base(cpu_tensor)), in CopyCPUTensorToDevice() 191 TensorReference ref(*cpu_tensor); in CopyCPUTensorToDevice() 211 Device* device, Tensor* cpu_tensor, in CopyDeviceTensorToCPU() argument 221 << reinterpret_cast<const void*>(cpu_tensor->tensor_data().data()) in CopyDeviceTensorToCPU() 223 << cpu_tensor->shape().DebugString() << " " in CopyDeviceTensorToCPU() [all …]
|
/aosp_15_r20/external/tensorflow/tensorflow/core/kernels/ |
H A D | stack.cc | 257 Tensor* cpu_tensor = in ComputeAsync() local 260 &tensor, "StackPush", device, cpu_tensor, in ComputeAsync() 261 [cpu_tensor, stack, ctx, done](const Status& s) { in ComputeAsync() 265 ctx->SetStatus(stack->Push({*cpu_tensor, alloc_attrs, true})); in ComputeAsync() 268 ctx->set_output(0, *cpu_tensor); in ComputeAsync() 271 delete cpu_tensor; in ComputeAsync() 304 Tensor* cpu_tensor = &value.tensor; in ComputeAsync() local 307 new Tensor(gpu_allocator, cpu_tensor->dtype(), cpu_tensor->shape()); in ComputeAsync() 309 cpu_tensor, device, device_tensor, in ComputeAsync()
|
H A D | dynamic_partition_op_gpu.cu.cc | 276 Tensor cpu_tensor; in ComputeAsync() local 283 &cpu_tensor, alloc_attr), in ComputeAsync() 289 ->ThenMemcpy(cpu_tensor.flat<int32>().data(), wrapped, in ComputeAsync() 301 partition_ref, cpu_tensor, done]() { in ComputeAsync() 306 this->AllocateOutputs(c, &data, &partitions, &cpu_tensor, &outputs, done); in ComputeAsync()
|
H A D | collective_nccl_test.cc | 273 Tensor cpu_tensor(dtype, shape); in InitTensor() local 274 init_f(&cpu_tensor); in InitTensor() 277 << cpu_tensor.DebugString(shape.num_elements()); in InitTensor() 279 VLOG(2) << "input tensor " << cpu_tensor.DebugString(); in InitTensor() 283 &cpu_tensor, device_, &input_)); in InitTensor()
|
/aosp_15_r20/external/tensorflow/tensorflow/core/distributed_runtime/ |
H A D | collective_rma_distributed.cc | 72 Tensor* cpu_tensor) { in PopulateTensorFromExtra() argument 73 char* head = reinterpret_cast<char*>(DMAHelper::base(cpu_tensor)); in PopulateTensorFromExtra() 82 Tensor* cpu_tensor) { in PopulateTensorFromResponse() argument 89 const int64_t total_bytes = cpu_tensor->TotalBytes(); in PopulateTensorFromResponse() 100 " bytes, expected: ", cpu_tensor->TotalBytes()); in PopulateTensorFromResponse() 102 PopulateTensorFromExtra(extra, cpu_tensor); in PopulateTensorFromResponse() 127 std::unique_ptr<Tensor> cpu_tensor; in RecvFromPeer() member 161 state->cpu_tensor = in RecvFromPeer() 164 dst_tensor = state->cpu_tensor.get(); in RecvFromPeer()
|
/aosp_15_r20/external/tensorflow/tensorflow/core/tpu/ |
H A D | virtual_device.cc | 27 void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, 32 Tensor* cpu_tensor, StatusCallback done) override; 38 void VirtualDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, in CopyCPUTensorToDevice() argument 43 *device_tensor = *cpu_tensor; in CopyCPUTensorToDevice() 50 Tensor* cpu_tensor, in CopyDeviceTensorToCPU() argument 52 *cpu_tensor = *device_tensor; in CopyDeviceTensorToCPU()
|
/aosp_15_r20/external/tensorflow/tensorflow/core/common_runtime/pluggable_device/ |
H A D | pluggable_device_util.cc | 176 const Tensor* device_tensor, Tensor* cpu_tensor, StatusCallback done) { in CopyPluggableDeviceTensorToCPU() argument 180 Status s = PrepareCopy(device, device_context, *device_tensor, cpu_tensor, in CopyPluggableDeviceTensorToCPU() 202 void* dst_ptr = GetBase(cpu_tensor); in CopyPluggableDeviceTensorToCPU() 222 const Tensor* cpu_tensor, const DeviceContext* device_context, in CopyCPUTensorToPluggableDevice() argument 228 Status s = PrepareCopy(device, device_context, *cpu_tensor, device_tensor, in CopyCPUTensorToPluggableDevice() 247 const int64_t total_bytes = cpu_tensor->TotalBytes(); in CopyCPUTensorToPluggableDevice() 250 void* src_ptr = GetBase(cpu_tensor); in CopyCPUTensorToPluggableDevice() 257 TensorReference input_ref(*cpu_tensor); in CopyCPUTensorToPluggableDevice()
|
H A D | pluggable_device_context.cc | 28 const Tensor* cpu_tensor, Device* device, Tensor* device_tensor, in CopyCPUTensorToDevice() argument 31 cpu_tensor, this, device, device_tensor, done, sync_dst_compute); in CopyCPUTensorToDevice() 37 Tensor* cpu_tensor, in CopyDeviceTensorToCPU() argument 40 device, this, device_tensor, cpu_tensor, done); in CopyDeviceTensorToCPU()
|
H A D | pluggable_device_util.h | 38 const Tensor* device_tensor, Tensor* cpu_tensor, StatusCallback done); 50 const Tensor* cpu_tensor, const DeviceContext* device_context,
|
H A D | pluggable_device_context.h | 52 void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, 58 Tensor* cpu_tensor, StatusCallback done) override;
|
/aosp_15_r20/external/tensorflow/tensorflow/core/common_runtime/gpu/ |
H A D | gpu_util.cc | 268 const Tensor* gpu_tensor, Tensor* cpu_tensor, in CopyGPUTensorToCPU() argument 273 Status s = PrepareCopy(gpu_device, device_context, *gpu_tensor, cpu_tensor, in CopyGPUTensorToCPU() 294 void* dst_ptr = GetBase(cpu_tensor); in CopyGPUTensorToCPU() 311 void GPUUtil::CopyCPUTensorToGPU(const Tensor* cpu_tensor, in CopyCPUTensorToGPU() argument 318 Status s = PrepareCopy(gpu_device, device_context, *cpu_tensor, gpu_tensor, in CopyCPUTensorToGPU() 337 const int64_t total_bytes = cpu_tensor->TotalBytes(); in CopyCPUTensorToGPU() 344 TensorReference input_ref(*cpu_tensor); in CopyCPUTensorToGPU() 348 void* src_ptr = GetBase(cpu_tensor); in CopyCPUTensorToGPU() 352 if (NeedStaging(cpu_tensor)) { in CopyCPUTensorToGPU()
|
H A D | gpu_device_test.cc | 107 void InitCPUTensor(Tensor* cpu_tensor, int num_elements, float value) { in InitCPUTensor() argument 108 auto tensor = cpu_tensor->tensor<float, 1>(); in InitCPUTensor() 114 void CopyCPUToGPU(Tensor* cpu_tensor, Tensor* gpu_tensor, Device* device, in CopyCPUToGPU() argument 116 TF_ASSERT_OK(device_context->CopyCPUTensorToDeviceSync(cpu_tensor, device, in CopyCPUToGPU() 120 void CopyGPUToCPU(Tensor* gpu_tensor, Tensor* cpu_tensor, Device* device, in CopyGPUToCPU() argument 123 gpu_tensor, /*tensor_name=*/"", device, cpu_tensor)); in CopyGPUToCPU() 533 Tensor cpu_tensor(cpu_allocator(), DT_FLOAT, TensorShape({kNumElements})); in TEST_F() local 536 InitCPUTensor(&cpu_tensor, kNumElements, 0); in TEST_F() 537 CopyCPUToGPU(&cpu_tensor, &output_tensor, device, device_context); in TEST_F() 538 InitCPUTensor(&cpu_tensor, kNumElements, 1); in TEST_F() [all …]
|
H A D | gpu_util_platform_specific.cc | 26 void GPUDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, in CopyCPUTensorToDevice() argument 31 GPUUtil::CopyCPUTensorToGPU(cpu_tensor, this, device, device_tensor, done, in CopyCPUTensorToDevice() 37 Device* device, Tensor* cpu_tensor, in CopyDeviceTensorToCPU() argument 39 GPUUtil::CopyGPUTensorToCPU(device, this, device_tensor, cpu_tensor, done); in CopyDeviceTensorToCPU()
|
/aosp_15_r20/external/pytorch/test/ |
H A D | test_cpp_extensions_open_device_registration.py | 345 cpu_tensor = torch.empty(3) 346 self.assertFalse(cpu_tensor.is_foo) 347 self.assertFalse(cpu_tensor.is_pinned("foo")) 349 cpu_tensor_pin = cpu_tensor.pin_memory("foo") 353 cpu_storage = cpu_tensor.storage() 364 cpu_tensor = torch.randn([3, 2, 1, 4]) 365 cpu_untyped_storage = cpu_tensor.untyped_storage() 412 cpu_tensor = torch.randn([8]) 413 foo_tensor = cpu_tensor.foo() 426 cpu_tensor = torch.randn([8]).float() [all …]
|
H A D | test_mkldnn.py | 41 for cpu_tensor in [torch.randn((1, 2, 3, 4), 45 cpu_tensor.requires_grad_() 51 mkldnn_tensor = cpu_tensor.to_mkldnn(dtype1) 61 self.assertEqual(cpu_tensor, cpu_tensor_2.float(), atol=atol, rtol=0) 65 self.assertEqual(mkldnn_tensor.numel(), cpu_tensor.numel()) 67 self.assertEqual(mkldnn_tensor.element_size(), cpu_tensor.element_size()) 69 self.assertEqual(mkldnn_tensor.element_size(), cpu_tensor.element_size() / 2) 76 cpu_tensor_lower = cpu_tensor.to(dtype=orig_dtype) 92 self.assertEqual(mkldnn_tensor.numel(), cpu_tensor.numel()) 106 for cpu_tensor in [torch.randint( [all …]
|
/aosp_15_r20/external/tensorflow/tensorflow/core/framework/ |
H A D | device_base.cc | 40 Tensor* cpu_tensor) { in CopyDeviceTensorToCPUSync() argument 43 CopyDeviceTensorToCPU(device_tensor, tensor_name, device, cpu_tensor, in CopyDeviceTensorToCPUSync() 52 Status DeviceContext::CopyCPUTensorToDeviceSync(const Tensor* cpu_tensor, in CopyCPUTensorToDeviceSync() argument 57 CopyCPUTensorToDevice(cpu_tensor, device, device_tensor, in CopyCPUTensorToDeviceSync()
|
H A D | device_base.h | 79 virtual void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device, 86 Status CopyCPUTensorToDeviceSync(const Tensor* cpu_tensor, Device* device, 101 Tensor* cpu_tensor, StatusCallback done) { in CopyDeviceTensorToCPU() argument 108 Tensor* cpu_tensor);
|
/aosp_15_r20/external/pytorch/aten/src/ATen/ |
H A D | Utils.cpp | 34 auto cpu_tensor = tensor_cpu(values, options.device(DeviceType::CPU)); in tensor_backend() local 35 return cpu_tensor.to(options.device()); in tensor_backend() 53 auto cpu_tensor = tensor_complex_cpu(values, options.device(DeviceType::CPU)); in tensor_complex_backend() local 54 return cpu_tensor.to(options.device()); in tensor_complex_backend()
|
/aosp_15_r20/external/tensorflow/tensorflow/core/common_runtime/ |
H A D | copy_tensor.cc | 255 Tensor* cpu_tensor = in ViaDMA() local 257 auto delete_and_done = [cpu_tensor, in ViaDMA() 259 delete cpu_tensor; in ViaDMA() 264 cpu_tensor, cpu_allocator, out_allocator, edge_name, dst, output, in ViaDMA() 270 CopyHostToDevice(cpu_tensor, cpu_allocator, out_allocator, edge_name, in ViaDMA() 275 cpu_tensor, send_dev_context, in ViaDMA()
|
/aosp_15_r20/external/pytorch/torch/csrc/ |
H A D | serialization.cpp | 231 at::Tensor cpu_tensor; in THPStorage_writeFileRaw() local 248 cpu_tensor = device_tensor.to(at::kCPU); in THPStorage_writeFileRaw() 249 data = (uint8_t*)cpu_tensor.data_ptr(); in THPStorage_writeFileRaw() 386 auto cpu_tensor = at::from_blob( in THPStorage_readFileRaw() local 397 device_tensor.copy_(cpu_tensor); in THPStorage_readFileRaw()
|
/aosp_15_r20/external/tensorflow/tensorflow/python/eager/ |
H A D | ops_test.py | 286 cpu_tensor = constant_op.constant(1.0) 287 gpu_tensor = cpu_tensor.gpu() 288 self.assertAllEqual(cpu_tensor + gpu_tensor, 2.0) 302 cpu_tensor = constant_op.constant(1.0) 303 result = cpu_tensor + cpu_tensor
|
/aosp_15_r20/test/mlts/benchmark/tools/ |
D | tensor_utils.py | 246 cpu_tensor = self['cpu'][layer] 248 assert(cpu_tensor.shape == nnapi_tensor.shape) 249 diff = cpu_tensor - nnapi_tensor 253 cpu_tensor = cpu_tensor.astype(float) 255 max_cpu_nnapi_tensor = np.maximum(np.abs(cpu_tensor), np.abs(nnapi_tensor))
|
/aosp_15_r20/external/tensorflow/tensorflow/core/distributed_runtime/rpc/ |
H A D | grpc_worker_service.cc | 681 Tensor* cpu_tensor = in RecvBufAsync() local 685 hook->prod_value, "empty_name", hook->prod_dev, cpu_tensor, in RecvBufAsync() 686 [hook, cpu_tensor, rendezvous_done](const Status& s) { in RecvBufAsync() 687 rendezvous_done(*cpu_tensor, s); in RecvBufAsync() 689 delete cpu_tensor; in RecvBufAsync()
|
/aosp_15_r20/external/tensorflow/tensorflow/core/tfrt/eager/cpp_tests/core_runtime/ |
H A D | op_handler_selector_test.cc | 401 tensorflow::core::RefCountPtr<FakeTensorHandle> cpu_tensor( in TEST_F() local 408 TF_ASSERT_OK(op->AddInput(cpu_tensor.get())); in TEST_F() 430 tensorflow::core::RefCountPtr<FakeTensorHandle> cpu_tensor( in TEST_F() local 437 TF_ASSERT_OK(op->AddInput(cpu_tensor.get())); in TEST_F()
|