Home
last modified time | relevance | path

Searched refs:cpu_tensor (Results 1 – 25 of 39) sorted by relevance

12

/aosp_15_r20/external/pytorch/aten/src/ATen/native/mkldnn/
H A DMKLDNNConversions.cpp51 Tensor cpu_tensor = at::empty( in mkldnn_to_dense() local
54 if (stensor.is_empty()) return cpu_tensor; in mkldnn_to_dense()
57 ? stensor.to_public(cpu_tensor.template data_ptr<float>(), in mkldnn_to_dense()
60 ? stensor.to_public(cpu_tensor.template data_ptr<BFloat16>(), in mkldnn_to_dense()
63 ? stensor.to_public(cpu_tensor.template data_ptr<Half>(), in mkldnn_to_dense()
66 ? stensor.to_public(cpu_tensor.template data_ptr<uint8_t>(), in mkldnn_to_dense()
68 : stensor.to_public(cpu_tensor.template data_ptr<int8_t>(), in mkldnn_to_dense()
73 cpu_tensor.as_strided_(dims, pub_tensor.get_strides()); in mkldnn_to_dense()
75 return cpu_tensor.contiguous().resize_(dims, c10::MemoryFormat::Contiguous); in mkldnn_to_dense()
78 Tensor dense_to_mkldnn(const Tensor& cpu_tensor, std::optional<ScalarType> dtype) { in dense_to_mkldnn() argument
[all …]
/aosp_15_r20/external/tensorflow/tensorflow/compiler/jit/
H A Dxla_device_context.cc114 void XlaDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, in CopyCPUTensorToDevice() argument
119 if (cpu_tensor->NumElements() == 0) { in CopyCPUTensorToDevice()
126 << reinterpret_cast<const void*>(cpu_tensor->tensor_data().data()) in CopyCPUTensorToDevice()
129 << " " << cpu_tensor->NumElements() << " " in CopyCPUTensorToDevice()
130 << cpu_tensor->shape().DebugString() << " " in CopyCPUTensorToDevice()
158 static_cast<const char*>(DMAHelper::base(cpu_tensor)), in CopyCPUTensorToDevice()
191 TensorReference ref(*cpu_tensor); in CopyCPUTensorToDevice()
211 Device* device, Tensor* cpu_tensor, in CopyDeviceTensorToCPU() argument
221 << reinterpret_cast<const void*>(cpu_tensor->tensor_data().data()) in CopyDeviceTensorToCPU()
223 << cpu_tensor->shape().DebugString() << " " in CopyDeviceTensorToCPU()
[all …]
/aosp_15_r20/external/tensorflow/tensorflow/core/kernels/
H A Dstack.cc257 Tensor* cpu_tensor = in ComputeAsync() local
260 &tensor, "StackPush", device, cpu_tensor, in ComputeAsync()
261 [cpu_tensor, stack, ctx, done](const Status& s) { in ComputeAsync()
265 ctx->SetStatus(stack->Push({*cpu_tensor, alloc_attrs, true})); in ComputeAsync()
268 ctx->set_output(0, *cpu_tensor); in ComputeAsync()
271 delete cpu_tensor; in ComputeAsync()
304 Tensor* cpu_tensor = &value.tensor; in ComputeAsync() local
307 new Tensor(gpu_allocator, cpu_tensor->dtype(), cpu_tensor->shape()); in ComputeAsync()
309 cpu_tensor, device, device_tensor, in ComputeAsync()
H A Ddynamic_partition_op_gpu.cu.cc276 Tensor cpu_tensor; in ComputeAsync() local
283 &cpu_tensor, alloc_attr), in ComputeAsync()
289 ->ThenMemcpy(cpu_tensor.flat<int32>().data(), wrapped, in ComputeAsync()
301 partition_ref, cpu_tensor, done]() { in ComputeAsync()
306 this->AllocateOutputs(c, &data, &partitions, &cpu_tensor, &outputs, done); in ComputeAsync()
H A Dcollective_nccl_test.cc273 Tensor cpu_tensor(dtype, shape); in InitTensor() local
274 init_f(&cpu_tensor); in InitTensor()
277 << cpu_tensor.DebugString(shape.num_elements()); in InitTensor()
279 VLOG(2) << "input tensor " << cpu_tensor.DebugString(); in InitTensor()
283 &cpu_tensor, device_, &input_)); in InitTensor()
/aosp_15_r20/external/tensorflow/tensorflow/core/distributed_runtime/
H A Dcollective_rma_distributed.cc72 Tensor* cpu_tensor) { in PopulateTensorFromExtra() argument
73 char* head = reinterpret_cast<char*>(DMAHelper::base(cpu_tensor)); in PopulateTensorFromExtra()
82 Tensor* cpu_tensor) { in PopulateTensorFromResponse() argument
89 const int64_t total_bytes = cpu_tensor->TotalBytes(); in PopulateTensorFromResponse()
100 " bytes, expected: ", cpu_tensor->TotalBytes()); in PopulateTensorFromResponse()
102 PopulateTensorFromExtra(extra, cpu_tensor); in PopulateTensorFromResponse()
127 std::unique_ptr<Tensor> cpu_tensor; in RecvFromPeer() member
161 state->cpu_tensor = in RecvFromPeer()
164 dst_tensor = state->cpu_tensor.get(); in RecvFromPeer()
/aosp_15_r20/external/tensorflow/tensorflow/core/tpu/
H A Dvirtual_device.cc27 void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
32 Tensor* cpu_tensor, StatusCallback done) override;
38 void VirtualDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, in CopyCPUTensorToDevice() argument
43 *device_tensor = *cpu_tensor; in CopyCPUTensorToDevice()
50 Tensor* cpu_tensor, in CopyDeviceTensorToCPU() argument
52 *cpu_tensor = *device_tensor; in CopyDeviceTensorToCPU()
/aosp_15_r20/external/tensorflow/tensorflow/core/common_runtime/pluggable_device/
H A Dpluggable_device_util.cc176 const Tensor* device_tensor, Tensor* cpu_tensor, StatusCallback done) { in CopyPluggableDeviceTensorToCPU() argument
180 Status s = PrepareCopy(device, device_context, *device_tensor, cpu_tensor, in CopyPluggableDeviceTensorToCPU()
202 void* dst_ptr = GetBase(cpu_tensor); in CopyPluggableDeviceTensorToCPU()
222 const Tensor* cpu_tensor, const DeviceContext* device_context, in CopyCPUTensorToPluggableDevice() argument
228 Status s = PrepareCopy(device, device_context, *cpu_tensor, device_tensor, in CopyCPUTensorToPluggableDevice()
247 const int64_t total_bytes = cpu_tensor->TotalBytes(); in CopyCPUTensorToPluggableDevice()
250 void* src_ptr = GetBase(cpu_tensor); in CopyCPUTensorToPluggableDevice()
257 TensorReference input_ref(*cpu_tensor); in CopyCPUTensorToPluggableDevice()
H A Dpluggable_device_context.cc28 const Tensor* cpu_tensor, Device* device, Tensor* device_tensor, in CopyCPUTensorToDevice() argument
31 cpu_tensor, this, device, device_tensor, done, sync_dst_compute); in CopyCPUTensorToDevice()
37 Tensor* cpu_tensor, in CopyDeviceTensorToCPU() argument
40 device, this, device_tensor, cpu_tensor, done); in CopyDeviceTensorToCPU()
H A Dpluggable_device_util.h38 const Tensor* device_tensor, Tensor* cpu_tensor, StatusCallback done);
50 const Tensor* cpu_tensor, const DeviceContext* device_context,
H A Dpluggable_device_context.h52 void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
58 Tensor* cpu_tensor, StatusCallback done) override;
/aosp_15_r20/external/tensorflow/tensorflow/core/common_runtime/gpu/
H A Dgpu_util.cc268 const Tensor* gpu_tensor, Tensor* cpu_tensor, in CopyGPUTensorToCPU() argument
273 Status s = PrepareCopy(gpu_device, device_context, *gpu_tensor, cpu_tensor, in CopyGPUTensorToCPU()
294 void* dst_ptr = GetBase(cpu_tensor); in CopyGPUTensorToCPU()
311 void GPUUtil::CopyCPUTensorToGPU(const Tensor* cpu_tensor, in CopyCPUTensorToGPU() argument
318 Status s = PrepareCopy(gpu_device, device_context, *cpu_tensor, gpu_tensor, in CopyCPUTensorToGPU()
337 const int64_t total_bytes = cpu_tensor->TotalBytes(); in CopyCPUTensorToGPU()
344 TensorReference input_ref(*cpu_tensor); in CopyCPUTensorToGPU()
348 void* src_ptr = GetBase(cpu_tensor); in CopyCPUTensorToGPU()
352 if (NeedStaging(cpu_tensor)) { in CopyCPUTensorToGPU()
H A Dgpu_device_test.cc107 void InitCPUTensor(Tensor* cpu_tensor, int num_elements, float value) { in InitCPUTensor() argument
108 auto tensor = cpu_tensor->tensor<float, 1>(); in InitCPUTensor()
114 void CopyCPUToGPU(Tensor* cpu_tensor, Tensor* gpu_tensor, Device* device, in CopyCPUToGPU() argument
116 TF_ASSERT_OK(device_context->CopyCPUTensorToDeviceSync(cpu_tensor, device, in CopyCPUToGPU()
120 void CopyGPUToCPU(Tensor* gpu_tensor, Tensor* cpu_tensor, Device* device, in CopyGPUToCPU() argument
123 gpu_tensor, /*tensor_name=*/"", device, cpu_tensor)); in CopyGPUToCPU()
533 Tensor cpu_tensor(cpu_allocator(), DT_FLOAT, TensorShape({kNumElements})); in TEST_F() local
536 InitCPUTensor(&cpu_tensor, kNumElements, 0); in TEST_F()
537 CopyCPUToGPU(&cpu_tensor, &output_tensor, device, device_context); in TEST_F()
538 InitCPUTensor(&cpu_tensor, kNumElements, 1); in TEST_F()
[all …]
H A Dgpu_util_platform_specific.cc26 void GPUDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor, in CopyCPUTensorToDevice() argument
31 GPUUtil::CopyCPUTensorToGPU(cpu_tensor, this, device, device_tensor, done, in CopyCPUTensorToDevice()
37 Device* device, Tensor* cpu_tensor, in CopyDeviceTensorToCPU() argument
39 GPUUtil::CopyGPUTensorToCPU(device, this, device_tensor, cpu_tensor, done); in CopyDeviceTensorToCPU()
/aosp_15_r20/external/pytorch/test/
H A Dtest_cpp_extensions_open_device_registration.py345 cpu_tensor = torch.empty(3)
346 self.assertFalse(cpu_tensor.is_foo)
347 self.assertFalse(cpu_tensor.is_pinned("foo"))
349 cpu_tensor_pin = cpu_tensor.pin_memory("foo")
353 cpu_storage = cpu_tensor.storage()
364 cpu_tensor = torch.randn([3, 2, 1, 4])
365 cpu_untyped_storage = cpu_tensor.untyped_storage()
412 cpu_tensor = torch.randn([8])
413 foo_tensor = cpu_tensor.foo()
426 cpu_tensor = torch.randn([8]).float()
[all …]
H A Dtest_mkldnn.py41 for cpu_tensor in [torch.randn((1, 2, 3, 4),
45 cpu_tensor.requires_grad_()
51 mkldnn_tensor = cpu_tensor.to_mkldnn(dtype1)
61 self.assertEqual(cpu_tensor, cpu_tensor_2.float(), atol=atol, rtol=0)
65 self.assertEqual(mkldnn_tensor.numel(), cpu_tensor.numel())
67 self.assertEqual(mkldnn_tensor.element_size(), cpu_tensor.element_size())
69 self.assertEqual(mkldnn_tensor.element_size(), cpu_tensor.element_size() / 2)
76 cpu_tensor_lower = cpu_tensor.to(dtype=orig_dtype)
92 self.assertEqual(mkldnn_tensor.numel(), cpu_tensor.numel())
106 for cpu_tensor in [torch.randint(
[all …]
/aosp_15_r20/external/tensorflow/tensorflow/core/framework/
H A Ddevice_base.cc40 Tensor* cpu_tensor) { in CopyDeviceTensorToCPUSync() argument
43 CopyDeviceTensorToCPU(device_tensor, tensor_name, device, cpu_tensor, in CopyDeviceTensorToCPUSync()
52 Status DeviceContext::CopyCPUTensorToDeviceSync(const Tensor* cpu_tensor, in CopyCPUTensorToDeviceSync() argument
57 CopyCPUTensorToDevice(cpu_tensor, device, device_tensor, in CopyCPUTensorToDeviceSync()
H A Ddevice_base.h79 virtual void CopyCPUTensorToDevice(const Tensor* cpu_tensor, Device* device,
86 Status CopyCPUTensorToDeviceSync(const Tensor* cpu_tensor, Device* device,
101 Tensor* cpu_tensor, StatusCallback done) { in CopyDeviceTensorToCPU() argument
108 Tensor* cpu_tensor);
/aosp_15_r20/external/pytorch/aten/src/ATen/
H A DUtils.cpp34 auto cpu_tensor = tensor_cpu(values, options.device(DeviceType::CPU)); in tensor_backend() local
35 return cpu_tensor.to(options.device()); in tensor_backend()
53 auto cpu_tensor = tensor_complex_cpu(values, options.device(DeviceType::CPU)); in tensor_complex_backend() local
54 return cpu_tensor.to(options.device()); in tensor_complex_backend()
/aosp_15_r20/external/tensorflow/tensorflow/core/common_runtime/
H A Dcopy_tensor.cc255 Tensor* cpu_tensor = in ViaDMA() local
257 auto delete_and_done = [cpu_tensor, in ViaDMA()
259 delete cpu_tensor; in ViaDMA()
264 cpu_tensor, cpu_allocator, out_allocator, edge_name, dst, output, in ViaDMA()
270 CopyHostToDevice(cpu_tensor, cpu_allocator, out_allocator, edge_name, in ViaDMA()
275 cpu_tensor, send_dev_context, in ViaDMA()
/aosp_15_r20/external/pytorch/torch/csrc/
H A Dserialization.cpp231 at::Tensor cpu_tensor; in THPStorage_writeFileRaw() local
248 cpu_tensor = device_tensor.to(at::kCPU); in THPStorage_writeFileRaw()
249 data = (uint8_t*)cpu_tensor.data_ptr(); in THPStorage_writeFileRaw()
386 auto cpu_tensor = at::from_blob( in THPStorage_readFileRaw() local
397 device_tensor.copy_(cpu_tensor); in THPStorage_readFileRaw()
/aosp_15_r20/external/tensorflow/tensorflow/python/eager/
H A Dops_test.py286 cpu_tensor = constant_op.constant(1.0)
287 gpu_tensor = cpu_tensor.gpu()
288 self.assertAllEqual(cpu_tensor + gpu_tensor, 2.0)
302 cpu_tensor = constant_op.constant(1.0)
303 result = cpu_tensor + cpu_tensor
/aosp_15_r20/test/mlts/benchmark/tools/
Dtensor_utils.py246 cpu_tensor = self['cpu'][layer]
248 assert(cpu_tensor.shape == nnapi_tensor.shape)
249 diff = cpu_tensor - nnapi_tensor
253 cpu_tensor = cpu_tensor.astype(float)
255 max_cpu_nnapi_tensor = np.maximum(np.abs(cpu_tensor), np.abs(nnapi_tensor))
/aosp_15_r20/external/tensorflow/tensorflow/core/distributed_runtime/rpc/
H A Dgrpc_worker_service.cc681 Tensor* cpu_tensor = in RecvBufAsync() local
685 hook->prod_value, "empty_name", hook->prod_dev, cpu_tensor, in RecvBufAsync()
686 [hook, cpu_tensor, rendezvous_done](const Status& s) { in RecvBufAsync()
687 rendezvous_done(*cpu_tensor, s); in RecvBufAsync()
689 delete cpu_tensor; in RecvBufAsync()
/aosp_15_r20/external/tensorflow/tensorflow/core/tfrt/eager/cpp_tests/core_runtime/
H A Dop_handler_selector_test.cc401 tensorflow::core::RefCountPtr<FakeTensorHandle> cpu_tensor( in TEST_F() local
408 TF_ASSERT_OK(op->AddInput(cpu_tensor.get())); in TEST_F()
430 tensorflow::core::RefCountPtr<FakeTensorHandle> cpu_tensor( in TEST_F() local
437 TF_ASSERT_OK(op->AddInput(cpu_tensor.get())); in TEST_F()

12