Home
last modified time | relevance | path

Searched full:grad_out (Results 1 – 25 of 81) sorted by relevance

1234

/aosp_15_r20/external/pytorch/aten/src/ATen/functorch/
H A DBatchRulesNorm.cpp128 const at::Tensor & grad_out, std::optional<int64_t> grad_out_bdim, in batch_norm_backward_no_weight_bias_batch_rule() argument
146grad_out, input, dummy_weight, running_mean_opt, running_var_opt, mean, rstd, training, eps, {true… in batch_norm_backward_no_weight_bias_batch_rule()
150 auto grad_out_ = moveBatchDimToFront(grad_out, grad_out_bdim); in batch_norm_backward_no_weight_bias_batch_rule()
156 …const auto bdim_size = get_bdim_size4(grad_out, grad_out_bdim, input, input_bdim, running_mean, ru… in batch_norm_backward_no_weight_bias_batch_rule()
200 const at::Tensor & grad_out, in batch_norm_backward_plumbing() argument
228 auto [grad_out_value, grad_out_bdim] = unwrapTensorAtLevel(grad_out, cur_level); in batch_norm_backward_plumbing()
256 grad_bias = grad_out.transpose(0, 1).sum(range(1, grad_out.dim())); in batch_norm_backward_plumbing()
263 const auto expanded_grad_weight = normalized_input * grad_out.transpose(0, 1); in batch_norm_backward_plumbing()
264 grad_weight = expanded_grad_weight.sum(range(1, grad_out.dim())); in batch_norm_backward_plumbing()
268grad_out.transpose(0, 1) * padRight(weight, std::nullopt, grad_out.dim()) : grad_out.transpose(0, … in batch_norm_backward_plumbing()
[all …]
/aosp_15_r20/external/pytorch/torch/utils/
H A Dflop_counter.py167 {D, E} [grad_out]
171 # grad_inp as conv_transpose(grad_out, weight)
175 D in grad_out), and C is only involved in the last element of the output
176 (and thus only depends upon E in grad_out)
183 {D, E} [inp (grad_out)]
188 I leave the fact that grad_inp for a transposed conv is just conv(grad_out,
191 # grad_weight as conv(inp, grad_out)
195 => {D, E} [grad_out]
201 {D, E} [weight (grad_out)]
205 # grad_weight of transposed conv as conv(grad_out, inp)
[all …]
/aosp_15_r20/external/pytorch/test/dynamo/
H A Dtest_backward_higher_order_ops.py36 grad_out = torch.tensor([2.0, 2.0])
37 out.backward(grad_out)
38 self.assertEqual(x.grad, y * grad_out)
52 grad_out = torch.tensor([2.0, 2.0])
53 out.backward(grad_out)
54 self.assertEqual(x.grad, grad_out * y)
119 grad_out = torch.tensor([2.0, 2.0])
121 out.backward(grad_out)
123 self.assertEqual(x.grad, grad_out * grad_out)
184 grad_out = torch.tensor([2.0, 2.0])
[all …]
H A Dtest_autograd_function.py343 def backward(ctx, grad_out): argument
344 return grad_out * ctx.x0
479 def backward(ctx, grad_out): argument
480 return grad_out * 3
963 def backward(ctx, grad_out): argument
964 return grad_out * ctx.x0
1010 def backward(ctx, grad_out): argument
1012 return grad_out * x, grad_out * y
1046 def backward(ctx, grad_out): argument
1047 x0mul = grad_out * ctx.x0
[all …]
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cpu/
H A DSpmmReduceKernel.cpp255 auto grad_out = grad_out_.contiguous(); in spmm_reduce_backward_input_kernel_impl() local
260 const scalar_t* grad_out_data = grad_out.const_data_ptr<scalar_t>(); in spmm_reduce_backward_input_kernel_impl()
266 int64_t K = grad_out.size(1); in spmm_reduce_backward_input_kernel_impl()
304 auto grad_out = grad_out_.contiguous(); in spmm_reduce_backward_input_arg_kernel_impl() local
310 const scalar_t* grad_out_data = grad_out.const_data_ptr<scalar_t>(); in spmm_reduce_backward_input_arg_kernel_impl()
315 int64_t M = grad_out.size(0); in spmm_reduce_backward_input_arg_kernel_impl()
316 int64_t K = grad_out.size(1); in spmm_reduce_backward_input_arg_kernel_impl()
317 auto grad = at::empty({M, K}, grad_out.options()); in spmm_reduce_backward_input_arg_kernel_impl()
388 auto grad_out = grad_out_.contiguous(); in spmm_reduce_backward_other_arg_kernel_impl() local
392 const scalar_t* grad_out_data = grad_out.const_data_ptr<scalar_t>(); in spmm_reduce_backward_other_arg_kernel_impl()
[all …]
H A DUnfoldBackwardKernel.cpp18 // grad_in/grad_out is just an input/output of unfold_backward kernel.
27 // unfold_backward receives grad_in and returns grad_out such that
29 // grad_out.shape = in.shape.
37 // grad_out[..., i_out_dim,...] = grad_in[..., i_in_dim,..., i_in_last_dim],
41 // In this case the iteration takes over grad_out,
42 // where grad_out[...,i_out_dim,...] accumulates all values
54 // gets added up to grad_out[...,i_out_dim,...].
112 Tensor& grad_out, in unfold_backward_cpu_kernel() argument
118 dim = maybe_wrap_dim(dim, grad_out.dim()); in unfold_backward_cpu_kernel()
126 auto grad_out_dim_stride = ensure_nonempty_stride(grad_out, dim); in unfold_backward_cpu_kernel()
[all …]
H A DFlashAttentionKernel.cpp424 const at::Tensor& grad_out, in cpu_flash_attention_backward() argument
493 int64_t grad_oStrideB = grad_out.stride(0); in cpu_flash_attention_backward()
494 int64_t grad_oStrideM = grad_out.stride(1); in cpu_flash_attention_backward()
495 int64_t grad_oStrideH = grad_out.stride(2); in cpu_flash_attention_backward()
522 const scalar_t* grad_out_data = grad_out.const_data_ptr<scalar_t>(); in cpu_flash_attention_backward()
549 // rowsum of grad_out * out in cpu_flash_attention_backward()
552 // dsum <- rowsum(grad_out * out) in cpu_flash_attention_backward()
620 // grad_v <- grad_v + attn.T @ grad_out in cpu_flash_attention_backward()
637 // grad_attn <- grad_out @ v.T in cpu_flash_attention_backward()
780 const at::Tensor& grad_out, in flash_attention_backward_kernel_impl() argument
[all …]
/aosp_15_r20/external/pytorch/test/functorch/
H A Dtest_control_flow.py306 grad_out = torch.ones_like(result)
307 grads = torch.autograd.grad(result, (x,), grad_out)
308 expected_grads = torch.autograd.grad(fn(x), (x,), grad_out)
313 grad_out = torch.ones_like(result)
314 return torch.autograd.grad(result, (x,), grad_out)
348 grad_out = torch.ones_like(result)
349 grads = torch.autograd.grad(result, (x,), grad_out)
350 expected_grads = torch.autograd.grad(fn(x), (x,), grad_out)
355 grad_out = torch.ones_like(result)
356 return torch.autograd.grad(result, (x,), grad_out)
[all …]
H A Dtest_ac.py218 def triton_relu_backward(grad_out: torch.Tensor) -> torch.Tensor:
219 grad_x = torch.empty_like(grad_out)
220 sz = grad_out.numel()
225 grad_out, grad_x, sz, BLOCK_SIZE
229 def _triton_relu_backward(ctx, grad_out: torch.Tensor) -> torch.Tensor:
230 return triton_relu_backward(grad_out)
/aosp_15_r20/external/pytorch/torch/testing/_internal/
H A Dcustom_op_db.py47 def numpy_cube_backward(ctx, grad_out, grad_dx): argument
49 grad_x = numpy_mul(grad_out, dx) + 6 * numpy_mul(grad_dx, x)
72 def numpy_mul_backward(ctx, grad_out): argument
74 grad_x = grad_out * y if ctx.needs_input_grad[0] else None
75 grad_y = grad_out * x if ctx.needs_input_grad[1] else None
101 def numpy_mul_scalar_backward(ctx, grad_out): argument
102 grad_x = grad_out * ctx.scalar
139 def numpy_sort_backward(ctx, grad_out, grad_ind, grad_ind_inv): argument
141 return numpy_take(grad_out, ind_inv, ind, ctx.dim), None
174 def numpy_take_backward(ctx, grad_out): argument
[all …]
/aosp_15_r20/external/pytorch/aten/src/ATen/native/
H A DUnfoldBackward.h30 // grad_in/grad_out is just an input/output of unfold_backward kernel.
33 Tensor& grad_out, in _make_unfold_backward_iter_over_grad_out() argument
39 dim = maybe_wrap_dim(dim, grad_out.dim()); in _make_unfold_backward_iter_over_grad_out()
42 auto grad_out_dim_size = ensure_nonempty_size(grad_out, dim); in _make_unfold_backward_iter_over_grad_out()
51 /* prepare grad_out for TensorIterator { */ in _make_unfold_backward_iter_over_grad_out()
52 auto grad_out_strides = ensure_nonempty_vec(grad_out.strides().vec()); in _make_unfold_backward_iter_over_grad_out()
53 auto grad_out_sizes = ensure_nonempty_vec(grad_out.sizes().vec()); in _make_unfold_backward_iter_over_grad_out()
55 auto grad_out_restrided = grad_out.as_strided( in _make_unfold_backward_iter_over_grad_out()
79 // i_dim in grad_out[i_1,...,i_dim,...i_n], in _make_unfold_backward_iter_over_grad_out()
86 auto grad_out_dim = ensure_nonempty_dim(grad_out.dim()); in _make_unfold_backward_iter_over_grad_out()
[all …]
/aosp_15_r20/external/pytorch/torch/_decomp/
H A Ddecompositions_for_jvp.py133 grad_out: Tensor,
168 grad_x_hat = grad_out * weight
170 grad_x_hat = grad_out
186 grad_out * x_hat, outer_dim_indices, False
189 d_weight = grad_out * x_hat
197 d_bias: Optional[Tensor] = torch.sum(grad_out, outer_dim_indices, False)
199 d_bias = grad_out.clone()
217 grad_out: Tensor,
261 grad_output_sum = torch.sum(grad_out, reduction_axes)
262 dot_p = torch.sum(grad_out * (input - mean), reduction_axes)
[all …]
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/
H A DNormalization.cu181 const Tensor& grad_out, const Tensor& input, const Tensor& mean, const Tensor& invstd, in batch_norm_elementwise_backward_train() argument
183 switch (batch_norm_choose_impl(input, grad_out)) { in batch_norm_elementwise_backward_train()
191 grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu); in batch_norm_elementwise_backward_train()
194 grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu); in batch_norm_elementwise_backward_train()
202 grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu); in batch_norm_elementwise_backward_train()
222 …Tensor grad_input = at::empty(input.sizes(), grad_out.options().memory_format(input.suggest_memory… in batch_norm_elementwise_backward_train()
225 .add_input(grad_out) in batch_norm_elementwise_backward_train()
236 AT_DISPATCH_FLOATING_TYPES_AND2(kHalf, kBFloat16, grad_out.scalar_type(), in batch_norm_elementwise_backward_train()
256 const Tensor& grad_out, const Tensor& input, in batch_norm_elementwise_backward_eval() argument
263 Tensor grad_input = at::empty(input.sizes(), grad_out.options()); in batch_norm_elementwise_backward_eval()
[all …]
H A DUnfoldBackwardKernel.cu13 // grad_in/grad_out is just an input/output of unfold_backward kernel.
88 // The algorithm is: for each index in grad_out find in _unfold_backward_internal_kernel()
122 Tensor& grad_out, in unfold_backward_cuda_kernel() argument
128 dim = maybe_wrap_dim(dim, grad_out.dim()); in unfold_backward_cuda_kernel()
136 auto grad_out_dim_stride = ensure_nonempty_stride(grad_out, dim); in unfold_backward_cuda_kernel()
139 grad_out, grad_in, dim, size, step); in unfold_backward_cuda_kernel()
/aosp_15_r20/external/pytorch/torch/_higher_order_ops/
H A Dflex_attention.py136 grad_out: torch.Tensor,
157 grad_out,
616 def backward(ctx, grad_out, grad_logsumexp): argument
655 grad_out,
728 grad_out: torch.Tensor,
766 grad_value = softmax_scores.to(query.dtype).transpose(-2, -1) @ grad_out
768 grad_softmax_scores = grad_out @ value.transpose(-2, -1)
770 sum_scores = torch.sum(out * grad_out, -1, keepdim=True)
838 grad_out: torch.Tensor,
855 grad_out,
[all …]
H A Dcond.py270 # def backward_true_fn(x, y, grad_out):
271 # return grad_out * y, grad_out * x
273 # def backward_false_fn(x, y, grad_out):
274 # retrun grad_out, None
/aosp_15_r20/external/pytorch/test/
H A Dtest_functionalization_of_rng_ops.py131 def backward(ctx, grad_out): argument
133 return grad_out * torch.rand_like(grad_out) * torch.cos(x)
171 def backward(ctx, grad_out): argument
173 return grad_out * torch.rand_like(grad_out) * torch.cos(x)
183 def backward(ctx, grad_out): argument
185 return grad_out * torch.rand_like(grad_out) * torch.rand_like(x)
/aosp_15_r20/external/pytorch/aten/src/ATen/native/transformers/cuda/
H A Dattention_backward.cu62 const Tensor& grad_out, in _flash_attention_backward() argument
82 auto contiguous_grad_out = grad_out.contiguous(); in _flash_attention_backward()
168 const Tensor& grad_out, in _scaled_dot_product_cudnn_attention_backward_cuda() argument
237 grad_out/*const Tensor& dO*/, in _scaled_dot_product_cudnn_attention_backward_cuda()
312 // handle potentially non-contiguous grad_out through a copy in _efficient_attention_backward()
313 auto grad_out = grad_out_.contiguous(); in _efficient_attention_backward() local
314 CHECK_NOSPARSE_CONTIGUOUS_CUDA(grad_out); in _efficient_attention_backward()
438 at::Tensor dout_t = grad_out.permute({0,2,1,3}); in _efficient_attention_backward()
515 : (grad_out.to(at::kFloat) * out.to(at::kFloat)) in _efficient_attention_backward()
529 p.grad_output_ptr = (const scalar_t*)grad_out.const_data_ptr(); in _efficient_attention_backward()
[all …]
/aosp_15_r20/external/tensorflow/tensorflow/python/kernel_tests/linalg/sparse/
H A Dcsr_sparse_matrix_grad_test.py69 grad_out = gradients_impl.gradients([dense_mats], [mats],
71 self.assertEqual(grad_out.dtype, dtypes.float32)
72 self.assertEqual(grad_out.shape, dense_shape)
73 grad_out_value = sess.run(grad_out)
100 grad_out = gradients_impl.gradients([new_coo_tensor.values], [values],
102 self.assertEqual(grad_out.dtype, dtypes.float32)
103 grad_out_vals = sess.run(grad_out)
/aosp_15_r20/external/pytorch/aten/src/ATen/native/sparse/
H A DSparseCsrTensorMath.h65 const Tensor& grad_out, in check_sparse_mm_reduce_impl_inputs() argument
72 checkLayout(c, grad_out, kStrided); in check_sparse_mm_reduce_impl_inputs()
73 checkScalarType(c, {grad_out, "grad_out", 1}, input_scalar_type); in check_sparse_mm_reduce_impl_inputs()
74 check_dim_size(grad_out, 2, 0, self.size(0)); in check_sparse_mm_reduce_impl_inputs()
75 check_dim_size(grad_out, 2, 1, other.size(1)); in check_sparse_mm_reduce_impl_inputs()
/aosp_15_r20/external/pytorch/torch/distributed/tensor/_ops/
H A D_math_ops.py521 # follow the sharding of the grad_out or out depending on which has more shards
723 # grad_out follows target if there is no reduction;
880 # args must be: grad_out, input, normalized_shape, mean, rstd,
917 # arg: grad_out
920 # grad_out, rstd, and normalized input, among which rstd
922 # placements, and grad_out's sharding is determined by the
924 # TODO: now grad_out spec follows input spec. we may need
925 # to change it to apply a pointwise rule over grad_out,
967 # d_weight = sum(grad_out * (input - mean) / rstd, outer_dim, keepdim=False)
971 # we may need to change to a pointwise rule over grad_out and
[all …]
/aosp_15_r20/external/pytorch/test/distributed/tensor/parallel/
H A Dtest_tp_style.py376 grad_out = torch.ones_like(sharded_out)
377 sharded_out.backward(grad_out)
399 grad_out = torch.ones_like(sharded_out)
400 sharded_out.backward(grad_out)
417 grad_out = torch.ones_like(sharded_out)
418 sharded_out.backward(grad_out)
430 grad_out = torch.ones_like(sharded_out)
431 sharded_out.backward(grad_out)
/aosp_15_r20/external/tensorflow/tensorflow/core/kernels/
H A Dmaxpooling_op.cc1031 Tensor* grad_out, const bool include_batch_in_index) { in launch()
1035 auto shard = [&grad_in, &argmax, &grad_out, include_batch_in_index]( in launch()
1038 GetTensorDim(grad_out->shape(), FORMAT_NHWC, 'N'); in launch()
1040 grad_out->NumElements() / batch_size; in launch()
1044 auto grad_out_flat = grad_out->flat<T>(); in launch()
1074 GetTensorDim(grad_out->shape(), FORMAT_NHWC, 'N'); in launch()
1075 const int64_t shard_cost = grad_out->NumElements() / batch_size; in launch()
1141 Tensor* grad_out = nullptr; in Compute() local
1143 {0}, 0, out_shape, &grad_out)); in Compute()
1148 context, params, grad_in, argmax, grad_out, include_batch_in_index_); in Compute()
[all …]
/aosp_15_r20/external/pytorch/torch/distributed/tensor/experimental/
H A D_attention.py362 grad_out: torch.Tensor,
406 kwargs[grad_out_name] = grad_out
459 grad_out: torch.Tensor,
479 grad_out=grad_out,
480 grad_out_name="grad_out",
500 grad_out: torch.Tensor,
518 grad_out=grad_out,
/aosp_15_r20/external/pytorch/torch/csrc/distributed/c10d/
H A DFunctional.cpp429 auto grad_out = grad_out_list[0].contiguous(); in backward() local
435 .call(grad_out, output_split_sizes, input_split_sizes, group_name); in backward()
484 auto grad_out = grad_out_list[0]; in backward() local
490 .call(grad_out, group_size, group_name); in backward()
540 auto grad_out = grad_out_list[0]; in backward() local
546 .call(grad_out, "sum", group_size, group_name); in backward()

1234