/aosp_15_r20/external/pytorch/aten/src/ATen/functorch/ |
H A D | BatchRulesNorm.cpp | 128 const at::Tensor & grad_out, std::optional<int64_t> grad_out_bdim, in batch_norm_backward_no_weight_bias_batch_rule() argument 146 …grad_out, input, dummy_weight, running_mean_opt, running_var_opt, mean, rstd, training, eps, {true… in batch_norm_backward_no_weight_bias_batch_rule() 150 auto grad_out_ = moveBatchDimToFront(grad_out, grad_out_bdim); in batch_norm_backward_no_weight_bias_batch_rule() 156 …const auto bdim_size = get_bdim_size4(grad_out, grad_out_bdim, input, input_bdim, running_mean, ru… in batch_norm_backward_no_weight_bias_batch_rule() 200 const at::Tensor & grad_out, in batch_norm_backward_plumbing() argument 228 auto [grad_out_value, grad_out_bdim] = unwrapTensorAtLevel(grad_out, cur_level); in batch_norm_backward_plumbing() 256 grad_bias = grad_out.transpose(0, 1).sum(range(1, grad_out.dim())); in batch_norm_backward_plumbing() 263 const auto expanded_grad_weight = normalized_input * grad_out.transpose(0, 1); in batch_norm_backward_plumbing() 264 grad_weight = expanded_grad_weight.sum(range(1, grad_out.dim())); in batch_norm_backward_plumbing() 268 …grad_out.transpose(0, 1) * padRight(weight, std::nullopt, grad_out.dim()) : grad_out.transpose(0, … in batch_norm_backward_plumbing() [all …]
|
/aosp_15_r20/external/pytorch/torch/utils/ |
H A D | flop_counter.py | 167 {D, E} [grad_out] 171 # grad_inp as conv_transpose(grad_out, weight) 175 D in grad_out), and C is only involved in the last element of the output 176 (and thus only depends upon E in grad_out) 183 {D, E} [inp (grad_out)] 188 I leave the fact that grad_inp for a transposed conv is just conv(grad_out, 191 # grad_weight as conv(inp, grad_out) 195 => {D, E} [grad_out] 201 {D, E} [weight (grad_out)] 205 # grad_weight of transposed conv as conv(grad_out, inp) [all …]
|
/aosp_15_r20/external/pytorch/test/dynamo/ |
H A D | test_backward_higher_order_ops.py | 36 grad_out = torch.tensor([2.0, 2.0]) 37 out.backward(grad_out) 38 self.assertEqual(x.grad, y * grad_out) 52 grad_out = torch.tensor([2.0, 2.0]) 53 out.backward(grad_out) 54 self.assertEqual(x.grad, grad_out * y) 119 grad_out = torch.tensor([2.0, 2.0]) 121 out.backward(grad_out) 123 self.assertEqual(x.grad, grad_out * grad_out) 184 grad_out = torch.tensor([2.0, 2.0]) [all …]
|
H A D | test_autograd_function.py | 343 def backward(ctx, grad_out): argument 344 return grad_out * ctx.x0 479 def backward(ctx, grad_out): argument 480 return grad_out * 3 963 def backward(ctx, grad_out): argument 964 return grad_out * ctx.x0 1010 def backward(ctx, grad_out): argument 1012 return grad_out * x, grad_out * y 1046 def backward(ctx, grad_out): argument 1047 x0mul = grad_out * ctx.x0 [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cpu/ |
H A D | SpmmReduceKernel.cpp | 255 auto grad_out = grad_out_.contiguous(); in spmm_reduce_backward_input_kernel_impl() local 260 const scalar_t* grad_out_data = grad_out.const_data_ptr<scalar_t>(); in spmm_reduce_backward_input_kernel_impl() 266 int64_t K = grad_out.size(1); in spmm_reduce_backward_input_kernel_impl() 304 auto grad_out = grad_out_.contiguous(); in spmm_reduce_backward_input_arg_kernel_impl() local 310 const scalar_t* grad_out_data = grad_out.const_data_ptr<scalar_t>(); in spmm_reduce_backward_input_arg_kernel_impl() 315 int64_t M = grad_out.size(0); in spmm_reduce_backward_input_arg_kernel_impl() 316 int64_t K = grad_out.size(1); in spmm_reduce_backward_input_arg_kernel_impl() 317 auto grad = at::empty({M, K}, grad_out.options()); in spmm_reduce_backward_input_arg_kernel_impl() 388 auto grad_out = grad_out_.contiguous(); in spmm_reduce_backward_other_arg_kernel_impl() local 392 const scalar_t* grad_out_data = grad_out.const_data_ptr<scalar_t>(); in spmm_reduce_backward_other_arg_kernel_impl() [all …]
|
H A D | UnfoldBackwardKernel.cpp | 18 // grad_in/grad_out is just an input/output of unfold_backward kernel. 27 // unfold_backward receives grad_in and returns grad_out such that 29 // grad_out.shape = in.shape. 37 // grad_out[..., i_out_dim,...] = grad_in[..., i_in_dim,..., i_in_last_dim], 41 // In this case the iteration takes over grad_out, 42 // where grad_out[...,i_out_dim,...] accumulates all values 54 // gets added up to grad_out[...,i_out_dim,...]. 112 Tensor& grad_out, in unfold_backward_cpu_kernel() argument 118 dim = maybe_wrap_dim(dim, grad_out.dim()); in unfold_backward_cpu_kernel() 126 auto grad_out_dim_stride = ensure_nonempty_stride(grad_out, dim); in unfold_backward_cpu_kernel() [all …]
|
H A D | FlashAttentionKernel.cpp | 424 const at::Tensor& grad_out, in cpu_flash_attention_backward() argument 493 int64_t grad_oStrideB = grad_out.stride(0); in cpu_flash_attention_backward() 494 int64_t grad_oStrideM = grad_out.stride(1); in cpu_flash_attention_backward() 495 int64_t grad_oStrideH = grad_out.stride(2); in cpu_flash_attention_backward() 522 const scalar_t* grad_out_data = grad_out.const_data_ptr<scalar_t>(); in cpu_flash_attention_backward() 549 // rowsum of grad_out * out in cpu_flash_attention_backward() 552 // dsum <- rowsum(grad_out * out) in cpu_flash_attention_backward() 620 // grad_v <- grad_v + attn.T @ grad_out in cpu_flash_attention_backward() 637 // grad_attn <- grad_out @ v.T in cpu_flash_attention_backward() 780 const at::Tensor& grad_out, in flash_attention_backward_kernel_impl() argument [all …]
|
/aosp_15_r20/external/pytorch/test/functorch/ |
H A D | test_control_flow.py | 306 grad_out = torch.ones_like(result) 307 grads = torch.autograd.grad(result, (x,), grad_out) 308 expected_grads = torch.autograd.grad(fn(x), (x,), grad_out) 313 grad_out = torch.ones_like(result) 314 return torch.autograd.grad(result, (x,), grad_out) 348 grad_out = torch.ones_like(result) 349 grads = torch.autograd.grad(result, (x,), grad_out) 350 expected_grads = torch.autograd.grad(fn(x), (x,), grad_out) 355 grad_out = torch.ones_like(result) 356 return torch.autograd.grad(result, (x,), grad_out) [all …]
|
H A D | test_ac.py | 218 def triton_relu_backward(grad_out: torch.Tensor) -> torch.Tensor: 219 grad_x = torch.empty_like(grad_out) 220 sz = grad_out.numel() 225 grad_out, grad_x, sz, BLOCK_SIZE 229 def _triton_relu_backward(ctx, grad_out: torch.Tensor) -> torch.Tensor: 230 return triton_relu_backward(grad_out)
|
/aosp_15_r20/external/pytorch/torch/testing/_internal/ |
H A D | custom_op_db.py | 47 def numpy_cube_backward(ctx, grad_out, grad_dx): argument 49 grad_x = numpy_mul(grad_out, dx) + 6 * numpy_mul(grad_dx, x) 72 def numpy_mul_backward(ctx, grad_out): argument 74 grad_x = grad_out * y if ctx.needs_input_grad[0] else None 75 grad_y = grad_out * x if ctx.needs_input_grad[1] else None 101 def numpy_mul_scalar_backward(ctx, grad_out): argument 102 grad_x = grad_out * ctx.scalar 139 def numpy_sort_backward(ctx, grad_out, grad_ind, grad_ind_inv): argument 141 return numpy_take(grad_out, ind_inv, ind, ctx.dim), None 174 def numpy_take_backward(ctx, grad_out): argument [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/ |
H A D | UnfoldBackward.h | 30 // grad_in/grad_out is just an input/output of unfold_backward kernel. 33 Tensor& grad_out, in _make_unfold_backward_iter_over_grad_out() argument 39 dim = maybe_wrap_dim(dim, grad_out.dim()); in _make_unfold_backward_iter_over_grad_out() 42 auto grad_out_dim_size = ensure_nonempty_size(grad_out, dim); in _make_unfold_backward_iter_over_grad_out() 51 /* prepare grad_out for TensorIterator { */ in _make_unfold_backward_iter_over_grad_out() 52 auto grad_out_strides = ensure_nonempty_vec(grad_out.strides().vec()); in _make_unfold_backward_iter_over_grad_out() 53 auto grad_out_sizes = ensure_nonempty_vec(grad_out.sizes().vec()); in _make_unfold_backward_iter_over_grad_out() 55 auto grad_out_restrided = grad_out.as_strided( in _make_unfold_backward_iter_over_grad_out() 79 // i_dim in grad_out[i_1,...,i_dim,...i_n], in _make_unfold_backward_iter_over_grad_out() 86 auto grad_out_dim = ensure_nonempty_dim(grad_out.dim()); in _make_unfold_backward_iter_over_grad_out() [all …]
|
/aosp_15_r20/external/pytorch/torch/_decomp/ |
H A D | decompositions_for_jvp.py | 133 grad_out: Tensor, 168 grad_x_hat = grad_out * weight 170 grad_x_hat = grad_out 186 grad_out * x_hat, outer_dim_indices, False 189 d_weight = grad_out * x_hat 197 d_bias: Optional[Tensor] = torch.sum(grad_out, outer_dim_indices, False) 199 d_bias = grad_out.clone() 217 grad_out: Tensor, 261 grad_output_sum = torch.sum(grad_out, reduction_axes) 262 dot_p = torch.sum(grad_out * (input - mean), reduction_axes) [all …]
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/cuda/ |
H A D | Normalization.cu | 181 const Tensor& grad_out, const Tensor& input, const Tensor& mean, const Tensor& invstd, in batch_norm_elementwise_backward_train() argument 183 switch (batch_norm_choose_impl(input, grad_out)) { in batch_norm_elementwise_backward_train() 191 grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu); in batch_norm_elementwise_backward_train() 194 grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu); in batch_norm_elementwise_backward_train() 202 grad_out, input, mean, invstd, weight, sum_dy, sum_dy_xmu); in batch_norm_elementwise_backward_train() 222 …Tensor grad_input = at::empty(input.sizes(), grad_out.options().memory_format(input.suggest_memory… in batch_norm_elementwise_backward_train() 225 .add_input(grad_out) in batch_norm_elementwise_backward_train() 236 AT_DISPATCH_FLOATING_TYPES_AND2(kHalf, kBFloat16, grad_out.scalar_type(), in batch_norm_elementwise_backward_train() 256 const Tensor& grad_out, const Tensor& input, in batch_norm_elementwise_backward_eval() argument 263 Tensor grad_input = at::empty(input.sizes(), grad_out.options()); in batch_norm_elementwise_backward_eval() [all …]
|
H A D | UnfoldBackwardKernel.cu | 13 // grad_in/grad_out is just an input/output of unfold_backward kernel. 88 // The algorithm is: for each index in grad_out find in _unfold_backward_internal_kernel() 122 Tensor& grad_out, in unfold_backward_cuda_kernel() argument 128 dim = maybe_wrap_dim(dim, grad_out.dim()); in unfold_backward_cuda_kernel() 136 auto grad_out_dim_stride = ensure_nonempty_stride(grad_out, dim); in unfold_backward_cuda_kernel() 139 grad_out, grad_in, dim, size, step); in unfold_backward_cuda_kernel()
|
/aosp_15_r20/external/pytorch/torch/_higher_order_ops/ |
H A D | flex_attention.py | 136 grad_out: torch.Tensor, 157 grad_out, 616 def backward(ctx, grad_out, grad_logsumexp): argument 655 grad_out, 728 grad_out: torch.Tensor, 766 grad_value = softmax_scores.to(query.dtype).transpose(-2, -1) @ grad_out 768 grad_softmax_scores = grad_out @ value.transpose(-2, -1) 770 sum_scores = torch.sum(out * grad_out, -1, keepdim=True) 838 grad_out: torch.Tensor, 855 grad_out, [all …]
|
H A D | cond.py | 270 # def backward_true_fn(x, y, grad_out): 271 # return grad_out * y, grad_out * x 273 # def backward_false_fn(x, y, grad_out): 274 # retrun grad_out, None
|
/aosp_15_r20/external/pytorch/test/ |
H A D | test_functionalization_of_rng_ops.py | 131 def backward(ctx, grad_out): argument 133 return grad_out * torch.rand_like(grad_out) * torch.cos(x) 171 def backward(ctx, grad_out): argument 173 return grad_out * torch.rand_like(grad_out) * torch.cos(x) 183 def backward(ctx, grad_out): argument 185 return grad_out * torch.rand_like(grad_out) * torch.rand_like(x)
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/transformers/cuda/ |
H A D | attention_backward.cu | 62 const Tensor& grad_out, in _flash_attention_backward() argument 82 auto contiguous_grad_out = grad_out.contiguous(); in _flash_attention_backward() 168 const Tensor& grad_out, in _scaled_dot_product_cudnn_attention_backward_cuda() argument 237 grad_out/*const Tensor& dO*/, in _scaled_dot_product_cudnn_attention_backward_cuda() 312 // handle potentially non-contiguous grad_out through a copy in _efficient_attention_backward() 313 auto grad_out = grad_out_.contiguous(); in _efficient_attention_backward() local 314 CHECK_NOSPARSE_CONTIGUOUS_CUDA(grad_out); in _efficient_attention_backward() 438 at::Tensor dout_t = grad_out.permute({0,2,1,3}); in _efficient_attention_backward() 515 : (grad_out.to(at::kFloat) * out.to(at::kFloat)) in _efficient_attention_backward() 529 p.grad_output_ptr = (const scalar_t*)grad_out.const_data_ptr(); in _efficient_attention_backward() [all …]
|
/aosp_15_r20/external/tensorflow/tensorflow/python/kernel_tests/linalg/sparse/ |
H A D | csr_sparse_matrix_grad_test.py | 69 grad_out = gradients_impl.gradients([dense_mats], [mats], 71 self.assertEqual(grad_out.dtype, dtypes.float32) 72 self.assertEqual(grad_out.shape, dense_shape) 73 grad_out_value = sess.run(grad_out) 100 grad_out = gradients_impl.gradients([new_coo_tensor.values], [values], 102 self.assertEqual(grad_out.dtype, dtypes.float32) 103 grad_out_vals = sess.run(grad_out)
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/sparse/ |
H A D | SparseCsrTensorMath.h | 65 const Tensor& grad_out, in check_sparse_mm_reduce_impl_inputs() argument 72 checkLayout(c, grad_out, kStrided); in check_sparse_mm_reduce_impl_inputs() 73 checkScalarType(c, {grad_out, "grad_out", 1}, input_scalar_type); in check_sparse_mm_reduce_impl_inputs() 74 check_dim_size(grad_out, 2, 0, self.size(0)); in check_sparse_mm_reduce_impl_inputs() 75 check_dim_size(grad_out, 2, 1, other.size(1)); in check_sparse_mm_reduce_impl_inputs()
|
/aosp_15_r20/external/pytorch/torch/distributed/tensor/_ops/ |
H A D | _math_ops.py | 521 # follow the sharding of the grad_out or out depending on which has more shards 723 # grad_out follows target if there is no reduction; 880 # args must be: grad_out, input, normalized_shape, mean, rstd, 917 # arg: grad_out 920 # grad_out, rstd, and normalized input, among which rstd 922 # placements, and grad_out's sharding is determined by the 924 # TODO: now grad_out spec follows input spec. we may need 925 # to change it to apply a pointwise rule over grad_out, 967 # d_weight = sum(grad_out * (input - mean) / rstd, outer_dim, keepdim=False) 971 # we may need to change to a pointwise rule over grad_out and [all …]
|
/aosp_15_r20/external/pytorch/test/distributed/tensor/parallel/ |
H A D | test_tp_style.py | 376 grad_out = torch.ones_like(sharded_out) 377 sharded_out.backward(grad_out) 399 grad_out = torch.ones_like(sharded_out) 400 sharded_out.backward(grad_out) 417 grad_out = torch.ones_like(sharded_out) 418 sharded_out.backward(grad_out) 430 grad_out = torch.ones_like(sharded_out) 431 sharded_out.backward(grad_out)
|
/aosp_15_r20/external/tensorflow/tensorflow/core/kernels/ |
H A D | maxpooling_op.cc | 1031 Tensor* grad_out, const bool include_batch_in_index) { in launch() 1035 auto shard = [&grad_in, &argmax, &grad_out, include_batch_in_index]( in launch() 1038 GetTensorDim(grad_out->shape(), FORMAT_NHWC, 'N'); in launch() 1040 grad_out->NumElements() / batch_size; in launch() 1044 auto grad_out_flat = grad_out->flat<T>(); in launch() 1074 GetTensorDim(grad_out->shape(), FORMAT_NHWC, 'N'); in launch() 1075 const int64_t shard_cost = grad_out->NumElements() / batch_size; in launch() 1141 Tensor* grad_out = nullptr; in Compute() local 1143 {0}, 0, out_shape, &grad_out)); in Compute() 1148 context, params, grad_in, argmax, grad_out, include_batch_in_index_); in Compute() [all …]
|
/aosp_15_r20/external/pytorch/torch/distributed/tensor/experimental/ |
H A D | _attention.py | 362 grad_out: torch.Tensor, 406 kwargs[grad_out_name] = grad_out 459 grad_out: torch.Tensor, 479 grad_out=grad_out, 480 grad_out_name="grad_out", 500 grad_out: torch.Tensor, 518 grad_out=grad_out,
|
/aosp_15_r20/external/pytorch/torch/csrc/distributed/c10d/ |
H A D | Functional.cpp | 429 auto grad_out = grad_out_list[0].contiguous(); in backward() local 435 .call(grad_out, output_split_sizes, input_split_sizes, group_name); in backward() 484 auto grad_out = grad_out_list[0]; in backward() local 490 .call(grad_out, group_size, group_name); in backward() 540 auto grad_out = grad_out_list[0]; in backward() local 546 .call(grad_out, "sum", group_size, group_name); in backward()
|