Lines Matching full:grad
6 # and `grad == grads[0]`, in all the derivative formulas in this file.
35 # uses `grad` instead of `grads[idx]`, then all but the first output will
63 # - 'grad', the gradient of the output (often spelled grad_output
77 # 'grad' (this case is special-cased in our code generation).
83 # permitted to mix the use of "grad", "grads", and
99 # grad, and `grad_input_mask[1]` is true if `input1` requires grad.
104 # grad. If we want to support more fine-grained signalling,
140 # called 'grad' (even though it really is a grad-grad).
147 # value of the argument "foo_p", its forward grad "foo_t" and the result of the
156 # will always have their forward grad formula called. This function is responsible
178 # gradient Tensors, where `grad[i].defined() == false`. Depending on the
180 # generation may automatically add some level of undefined grad support,
183 # * 1 input grad and 1 output grad:
184 # Complete undefined grad support is automatically added, so you
188 # * 1 input grad and multiple output grads:
189 # Undefined grad support is automatically added ONLY in the case where
196 # If your derivative formula uses more than one output grad, it is usually
197 # preferable to add undefined grad support in the backward function itself
204 # Therefore, your backward function should treat an undefined output grad as
215 # an undefined Tensor is correct for a given input grad, it is also logically
216 # correct to return a defined grad full of zeros, but that would not be
222 self: grad * self.sgn()
226 self: grad * -((-self * self + 1).rsqrt()).conj()
230 self: handle_r_to_c(self.scalar_type(), grad)
231 other: handle_r_to_c(other.scalar_type(), maybe_multiply(grad, alpha.conj()))
235 self: handle_r_to_c(self.scalar_type(), grad)
239 self: maybe_multiply(grad, beta.conj())
240 …batch1: maybe_multiply(grad.unsqueeze(0).expand_symint({ batch1.sym_size(0), batch1.sym_size(1), b…
241 …batch2: maybe_multiply(batch1.transpose(1, 2).conj().bmm(grad.unsqueeze(0).expand_symint({ batch1.…
245 self: handle_r_to_c(self.scalar_type(), grad)
246 tensor1: handle_r_to_c(tensor1.scalar_type(), grad * (value / tensor2).conj())
247 …tensor2: handle_r_to_c(tensor2.scalar_type(), -grad * (value * tensor1 / (tensor2 * tensor2)).conj…
251 self: handle_r_to_c(self.scalar_type(), grad)
252 tensor1: handle_r_to_c(tensor1.scalar_type(), grad * (tensor2 * value).conj())
253 tensor2: handle_r_to_c(tensor2.scalar_type(), grad * (tensor1 * value).conj())
257 self: maybe_multiply(grad, beta.conj())
258 mat1: mm_mat1_backward(grad, mat2, mat1.sym_sizes(), mat1.sym_strides(), mat1.layout(), alpha)
259 mat2: mm_mat2_backward(grad, mat1, mat2.sym_sizes(), mat2.sym_strides(), mat2.layout(), alpha)
263 self: maybe_multiply(grad, beta)
264 mat1: mm_mat1_sparse_backward(grad, mat1, mat2, alpha)
265 mat2: mm_mat2_backward(grad, mat1, mat2.sym_sizes(), mat2.sym_strides(), mat2.layout(), alpha)
268 self: maybe_multiply(grad, beta.conj())
269 mat: maybe_multiply(grad.ger(vec.conj()), alpha.conj())
270 vec: maybe_multiply(mat.t().conj().mv(grad), alpha.conj())
274 self: maybe_multiply(grad, beta.conj())
275 vec1: maybe_multiply(grad.mv(vec2.conj()), alpha.conj())
276 vec2: maybe_multiply(grad.t().mv(vec1.conj()), alpha.conj())
280 theta: affine_grid_generator_backward_symint(grad, size, align_corners)
283 self: grad
287 self: angle_backward(grad, self)
318 …self: "self.is_complex() ? grad * ((self + 1).rsqrt() * (self - 1).rsqrt()).conj() : grad * (self …
325 self: grad * (self.pow(2) + 1).rsqrt().conj()
332 self: grad * 1 / (1 - self.pow(2)).conj()
339 self: as_strided_backward(grad, TensorGeometry(self), size, stride, storage_offset)
343 self: as_strided_backward(grad, TensorGeometry(self), size, stride, storage_offset)
347 self: grad * (-self * self + 1).rsqrt().conj()
351 self: grad / (self * self + 1).conj()
355 self, other: atan2_backward(grad, self, other, grad_input_mask)
359 self: maybe_multiply(grad, beta.conj())
360 batch1: maybe_multiply(grad.bmm(batch2.transpose(1, 2).conj()), alpha.conj())
361 batch2: maybe_multiply(batch1.transpose(1, 2).conj().bmm(grad), alpha.conj())
365 self: zeros_like(grad)
369 self: zeros_like(grad)
374 self: zeros_like(grad)
378 self: grad.bmm(mat2.transpose(1, 2).conj())
379 mat2: self.transpose(1, 2).conj().bmm(grad)
383 self, other: matmul_backward(grad, self, other, grad_input_mask)
386 …tensors: cat_tensors_backward(grad, to_args_sizes_symint(tensors), to_args_scalartypes(tensors), d…
390 self: zeros_like(grad)
394 self: zeros_like(grad)
398 self: cholesky_backward(grad, upper, result)
401 self: cholesky_backward(grad, upper, L)
405 self, input2: cholesky_solve_backward(grad, self, input2, result, upper, grad_input_mask)
409 self: cholesky_inverse_backward(grad, self, upper, result)
415 self: clamp_backward(grad, self, min, max)
416 min, max: clamp_backward_min_max(grad, self, min, max, grad_input_mask)
420 self: clamp_backward(grad, self, min, max)
424 self: where(self >= min, grad, at::scalar_tensor(0., grad.options()))
428 self: where(self >= min, grad, at::scalar_tensor(0., grad.options()))
429 min: where(self < min, grad, at::scalar_tensor(0., grad.options()))
433 self: where(self <= max, grad, at::scalar_tensor(0., grad.options()))
437 self: where(self <= max, grad, at::scalar_tensor(0., grad.options()))
438 max: where(self > max, grad, at::scalar_tensor(0., grad.options()))
442 self: grad
446 self: grad
450 self: _to_copy_backward(grad, self.options())
453 # (If dtype IS nullopt, we rely on the regular check that any input requires grad).
457 self: grad
460 real: at::real(grad)
461 imag: at::imag(grad)
465 abs, angle: polar_backward(grad, result)
469 self: grad.conj()
473 self: grad.neg()
477 self: grad.conj_physical()
481 self: grad.conj_physical()
485 self: copysign_tensor_self_backward(grad, self, result)
490 self: copysign_tensor_self_backward(grad, self, result)
494 self: grad * -self.sin().conj()
498 self: grad * self.sinh().conj()
508 self: at::linalg_cross(other.conj(), grad, dim)
509 other: at::linalg_cross(grad, self.conj(), dim)
513 self: logcumsumexp_backward(grad, self, result, dim)
517 self: cumprod_backward(grad.to(self.scalar_type()), self, dim, result)
521 self: cumsum_backward(grad.to(self.scalar_type()), dim)
525 self: cummaxmin_backward(grad, self, indices, dim)
529 self: cummaxmin_backward(grad, self, indices, dim)
533 …self, weight, bias: "grad.defined() ? conv_tbc_backward(grad, self, weight, bias, pad) : std::tupl…
536 …log_probs: _ctc_loss_backward(grad, log_probs, targets, input_lengths, target_lengths, result0, re…
539 …log_probs: _ctc_loss_backward(grad, log_probs, targets, input_lengths, target_lengths, result0, re…
542 self: deg2rad_backward(grad)
546 A: linalg_det_backward(grad, result, A, LU, pivots)
556 tensors: block_diag_backward(grad, to_args_sizes(tensors), to_args_scalartypes(tensors))
560 self: grad.diagonal(offset, dim1, dim2)
564 self: diagonal_backward_symint(grad, self.sym_sizes(), offset, dim1, dim2)
568 grad_output: grad.diagonal(offset, dim1, dim2)
572 self: norm_backward(grad, self - other, p, result)
573 other: -norm_backward(grad, self - other, p, result)
580 self: div_tensor_self_backward(grad, other, self.scalar_type())
581 other: div_tensor_other_backward(grad, self, other)
585 self: div_tensor_self_backward(grad, other, self.scalar_type())
589 self: div_tensor_self_backward(grad, other, self.scalar_type(), rounding_mode)
590 other: div_tensor_other_backward(grad, self, other, rounding_mode)
594 self: div_tensor_self_backward(grad, other, self.scalar_type(), rounding_mode)
598 self: grad * tensor.conj()
599 tensor: grad * self.conj()
603 self: grad.conj() * other
604 other: grad * self
608 self: _fused_dropout_backward(grad, result1, p)
611 …pout_backward(grad, result1, (!train.has_value() || !train.value() ? 1 : (p == 1 ? 0.0 : 1.0 / (1.…
615 grad_output: "native_dropout_double_backward(grad, grad_output, mask, scale)"
628 self: 2.0 / sqrt(M_PI) * exp(-(self.pow(2))) * grad
632 self: -2.0 / sqrt(M_PI) * exp(-(self.pow(2))) * grad
636 self: (2.0 * self * result - 2.0 / sqrt(M_PI)) * grad
640 self: 0.5 * sqrt(M_PI) * exp(self.erfinv().pow(2)) * grad
644 self: grad * result.conj()
648 self: grad * result.conj() * M_LN2
652 self: grad * (result.conj() + 1)
657 self: at::sum_to(grad, self.sym_sizes())
661 self: zeros_like(grad)
665 self: fake_quantize_per_tensor_affine_cachemask_backward(grad, mask)
668 self: fake_quantize_per_tensor_affine_cachemask_backward(grad, mask)
671 …self, scale, zero_point: "grad.defined() ? _fake_quantize_learnable_per_tensor_affine_backward(gra…
674 self: fake_quantize_per_channel_affine_cachemask_backward(grad, mask)
677 …self, scale, zero_point: "grad.defined() ? _fake_quantize_learnable_per_channel_affine_backward(gr…
680 self: fake_quantize_per_tensor_affine_cachemask_backward(grad, mask)
683 self: zeros_like(grad)
687 self: zeros_like(grad)
688 value: grad.sum()
692 self: zeros_like(grad)
696 self: zeros_like(grad)
697 value: grad.sum()
701 self: zeros_like(grad)
705 self: grad
709 self: grad
710 other: -grad * self.div(other, /*rounding_mode=*/"trunc")
714 self: grad
718 self: grad / exponent.exp2()
722 self: gather_backward(grad, self, dim, index, sparse_grad)
736 self: zeros_like(grad)
761 …input, grid: "grad.defined() ? grid_sampler_2d_backward(grad, input, grid, interpolation_mode, pad…
764 …input, grid: "grad.defined() ? grid_sampler_3d_backward(grad, input, grid, interpolation_mode, pad…
768 …input, grid: "grad.defined() ? _grid_sampler_2d_cpu_fallback_backward(grad, input, grid, interpola…
780 self: hardsigmoid_backward(grad, self)
787 self: hardswish_backward(grad, self)
791 grad_output: hardswish_backward(grad, self)
792 …self: at::where(at::logical_and(-3.0 < self, self < 3.0), grad * grad_output / 3.0, at::zeros({}, …
797 self: grad * self / result
798 other: grad * other / result
802 self: grad * at::special_i1(self)
806 self: grad * (at::special_i1e(self) - self.sgn() * result)
810 self: i1_backward(grad, self, result)
814 self: i1e_backward(grad, self, result)
819 other: grad * exp((self - 1) * log(other) - other - lgamma(self))
823 other: -grad * exp((self - 1) * log(other) - other - lgamma(self))
826 self: index_backward(grad.new_zeros_symint(self.sym_sizes(), self.options()), indices, grad)
830 …self: at::_unsafe_index_put(grad.new_zeros_symint(self.sym_sizes(), self.options()), indices, grad…
834 self: grad
838 …source: "maybe_multiply(source.dim() > 0 ? grad.index_select(dim, index).expand_as(source) : grad.…
843 self, source: index_reduce_backward(grad, self, dim, index, source, reduce, include_self, result)
847 self: grad.index_fill(dim, index, 0)
851 …source: "source.dim() > 0 ? grad.index_select(dim, index).expand_as(source) : grad.index_select(di…
856 self: grad.index_fill(dim, index, 0)
861 self: grad.index_fill(dim, index, 0)
862 value: grad.index_select(dim, std::get<0>(at::_unique(index, /*sorted=*/false))).sum()
867 self: "accumulate ? grad : grad.index_put(indices, zeros_like(values), false)"
868 values: grad.index(indices)
872 self: "accumulate ? grad : at::_unsafe_index_put(grad, indices, zeros_like(values), false)"
873 values: at::_unsafe_index(grad, indices)
877 self: "accumulate ? grad : grad.index_put(indices, zeros_like(values), false)"
878 values: grad.index(indices)
882 self: index_select_backward_symint(grad, self.sym_sizes(), dim, index)
887 A: -at::matmul(inverse.mH(), at::matmul(grad, inverse.mH()))
892 self: pinv_backward(grad, result, self)
899 self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
912 …self: "weight.isComplex() ? grad * (1 - weight.conj().toComplexDouble()) : grad * (1 - weight.toDo…
913 end: grad * weight.conj()
917 self: grad * (1 - weight).conj()
918 end: grad * weight.conj()
919 weight: grad * (end - self).conj()
923 self: grad * digamma(self)
927 self: grad * polygamma(1, self)
931 self: grad * polygamma(n + 1, self)
935 self: grad * polygamma(n + 1, self)
939 self: grad.div(self.conj())
943 self: grad / (self.conj() * 2.3025850929940456)
947 self: log1p_backward(grad, self)
951 self: grad / (self.conj() * 0.6931471805599453)
955 self: grad / (1 + exp(other - self)).conj()
956 other: grad / (1 + exp(self - other)).conj()
960 self: grad / (1 + pow(2, other - self))
961 other: grad / (1 + pow(2, self - other))
973 self: at::xlogy(grad, other).masked_fill((self == 0.) & (other <= 0.), 0.)
974 other: grad * self / other
978 other: grad * self / other
983 ? at::xlogy(grad, other)
984 : at::xlogy(grad, other).masked_fill(self == 0., 0.)"
990 self: at::special_xlog1py(grad, other).masked_fill((self == 0.) & (other <= -1.), 0.)
991 other: grad * self / (other + 1)
995 other: grad * self / (other + 1)
1000 ? at::special_xlog1py(grad, other)
1001 : at::special_xlog1py(grad, other).masked_fill(self == 0., 0.)"
1006 other: grad * -self * special_zeta(self + 1., other)
1009 other: grad * -self * special_zeta(self.toDouble() + 1., other)
1015 self: zeros_like(grad)
1019 self: logsumexp_backward(grad, self, result, dim, keepdim)
1023 self, b: linalg_lstsq_backward(grad, self, b, grad_input_mask)
1037 A: lu_factor_ex_backward(grad, LU, pivots, pivot)
1048 LU: linalg_lu_solve_LU(grad, LU, pivots, result, left, adjoint)
1049 B: "at::linalg_lu_solve(LU, pivots, grad, left, !adjoint)"
1060 self: grad.masked_fill(mask, 0)
1065 self: grad.masked_fill(mask, 0)
1066 value: masked_fill_backward(grad, mask)
1071 self: grad.masked_fill(mask, 0)
1072 source: masked_scatter_backward_symint(grad, mask, source.sym_sizes())
1077 grad_output: zeros_like(grad_output).masked_scatter(mask, grad)
1082 self: masked_select_backward(grad, self, mask)
1087 self: linalg_matrix_exp_differential(self, grad, /*adjoint*/ true)
1091 self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1095 self: evenly_distribute_backward(grad, self, result)
1099 self: at::where(self == other, grad / 2, grad).masked_fill_(self < other, 0)
1100 other: at::where(self == other, grad / 2, grad).masked_fill_(self > other, 0)
1104 self: grad.masked_fill((self >= other).logical_or_(other.isnan()).logical_not_(), 0)
1105 other: grad.masked_fill((self >= other).logical_or_(other.isnan()), 0)
1109 self: grad.expand_symint(self.sym_sizes()) / self.sym_numel()
1113 self: mean_backward(grad, self.sym_sizes(), dim, self.sym_numel(), keepdim)
1117 self: evenly_distribute_backward(grad, self, result)
1121 self: evenly_distribute_backward(grad, self, result)
1139 self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1143 self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1147 self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1151 self: evenly_distribute_backward(grad, self, result)
1155 self: at::where(self == other, grad / 2, grad).masked_fill_(self > other, 0)
1156 other: at::where(self == other, grad / 2, grad).masked_fill_(self < other, 0)
1160 self: grad.masked_fill((self <= other).logical_or_(other.isnan()).logical_not_(), 0)
1161 other: grad.masked_fill((self <= other).logical_or_(other.isnan()), 0)
1165 …self: scale_grad_by_count(restore_reduced_dims(grad, dim, keepdim), restore_reduced_dims(result, d…
1169 …self: scale_grad_by_count(restore_reduced_dims(grad, dim, keepdim), restore_reduced_dims(result, d…
1173 self: mm_mat1_backward(grad, mat2, self.sym_sizes(), self.sym_strides(), self.layout(), 1)
1174 mat2: mm_mat2_backward(grad, self, mat2.sym_sizes(), mat2.sym_strides(), mat2.layout(), 1)
1178 self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1182 self: mul_tensor_backward(grad, other, self.scalar_type())
1183 other: mul_tensor_backward(grad, self, other.scalar_type())
1187 self: mul_tensor_backward(grad, other, self.scalar_type())
1191 self: grad.ger(vec.conj())
1192 vec: self.conj().t().mv(grad)
1196 self: mvlgamma_backward(grad, self, p)
1200 self: grad * at::isfinite(self)
1204 …input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, running_mea…
1208 …input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, running_mea…
1212 …input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, running_mea…
1216 …input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, Tensor(), T…
1225 …input, weight, bias: "grad.defined() ? native_layer_norm_backward_symint(grad, input, normalized_s…
1250 self: grad.neg()
1254 …input, weight, bias: "grad.defined() ? batch_norm_backward(grad, input, weight, running_mean, runn…
1258 …input, weight, bias: "grad.defined() ? batch_norm_backward(grad, input, weight, running_mean, runn…
1272 self: norm_backward(grad, self, p, result)
1276 self: norm_backward(grad, self, p, result, dim, keepdim)
1280 self: norm_backward(grad, self.to(grad.scalar_type()), p, result)
1284 self: norm_backward(grad, self.to(grad.scalar_type()), p, result, dim, keepdim)
1288 self: linalg_vector_norm_backward(grad, self, ord, result, dim, keepdim)
1292 self: _pdist_backward(grad, self, p, result)
1294 - name: _pdist_backward(Tensor grad, Tensor self, float p, Tensor pdist) -> Tensor
1295 grad: not_implemented("_pdist_backward")
1300 x1, x2: _euclidean_dist_backward(grad, x1, x2, result)
1303 x1: _cdist_backward(grad.contiguous(), x1, x2, p, result)
1304 x2: _cdist_backward(grad.mT().contiguous(), x2, x1, p, result.mT().contiguous())
1306 - name: _cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor
1307 grad: not_implemented("_cdist_backward")
1313 self: zeros_like(grad)
1317 mean: at::zeros_symint(mean.sym_sizes(), grad.options())
1321 std: at::zeros_symint(std.sym_sizes(), grad.options())
1325 mean: at::zeros_symint(mean.sym_sizes(), grad.options())
1326 std: at::zeros_symint(std.sym_sizes(), grad.options())
1330 input, tau: householder_product_backward(grad, result, input, tau)
1334 …self, input2, input3: ormqr_backward(grad, result, self, input2, input3, left, transpose, grad_inp…
1337 self: permute_backwards(grad, dims)
1345 self: pow_backward(grad, self, exponent)
1349 self: pow_backward_self(grad, self, exponent)
1350 exponent: pow_backward_exponent(grad, self, exponent, result)
1354 exponent: pow_backward_exponent(grad, self, exponent, result)
1358 self: prod_backward(grad, self.to(grad.scalar_type()), result)
1362 self: prod_backward(grad, self.to(grad.scalar_type()), result, dim, keepdim)
1366 self: "accumulate ? grad : grad.put(index, zeros_like(source), false)"
1368 source: grad.take(index).reshape_as(source)
1376 self: rad2deg_backward(grad)
1380 self: zeros_like(grad)
1384 self: zeros_like(grad)
1388 self: zeros_like(grad)
1392 self: -grad * (result * result).conj()
1396 self: grad
1400 self: grad
1401 other: -grad * self.div(other, /*rounding_mode=*/"floor")
1405 self: renorm_backward(grad, self, p, dim, maxnorm)
1409 self: repeat_backward(grad, repeats, self.sym_sizes())
1413 self: grad * (-(1 + self.log()))
1417 self: grad * std::sqrt(2 * M_PI) * (result.square() / 2).exp()
1421 self: grad / std::sqrt(2 * M_PI) * (result + self.pow(2) / 2).neg().exp()
1437 self: grad.reshape_symint(self.sym_sizes())
1441 self: zeros_like(grad)
1445 self: zeros_like(grad)
1449 self: -0.5 * grad * result.pow(3).conj()
1453 self: grad.scatter(dim, index, 0)
1455 src: grad.gather(dim, index)
1459 self: grad.scatter(dim, index, 0)
1464 self: grad
1466 src: grad.gather(dim, index)
1472 self: select_backward_symint(grad, self.sym_sizes(), dim, index)
1475 self: _nested_select_backward_symint(grad, self, dim, index)
1478 grad_output: grad.select_symint(dim, index)
1482 self: sigmoid_backward(grad, result)
1486 …:is_enabled() ? infinitely_differentiable_logit_backward(grad, self, eps) : logit_backward(grad, s…
1490 self: zeros_like(grad)
1494 self: sgn_backward(self, grad, result)
1501 self: grad * self.cos().conj()
1505 self: sinc_backward(grad, self)
1509 self: grad * self.cosh().conj()
1513 self: slice_backward_wrapper(grad, self.sym_sizes(), dim, start, end, step)
1517 grad_output: grad.slice_symint(dim, start, end, step)
1521 self: grad.slice_symint(dim, start, end, step)
1522 src: slice_scatter_symint(grad, zeros_like(self), dim, start, end, step)
1526 self: slice_scatter_symint(grad, zeros_like(src), dim, start, end, step)
1527 src: grad.slice_symint(dim, start, end, step)
1531 self: select_scatter_symint(grad, zeros_like(src), dim, index)
1532 src: grad.select_symint(dim, index)
1536 self: diagonal_scatter(grad, zeros_like(src), offset, dim1, dim2)
1537 src: grad.diagonal(offset, dim1, dim2)
1541 …self: as_strided_scatter_backward(grad, TensorGeometry(self), TensorGeometry(src), size, stride, s…
1543 src: grad.contiguous().as_strided_symint(size, stride, storage_offset)
1547 A, B: linalg_solve_backward(grad, result, A, LU, pivots, left, grad_input_mask[1])
1552 self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), true)
1557 self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), true)
1582 self: grad / (2 * result.conj())
1586 self: unsqueeze_to(grad, self.sym_sizes())
1592 self: unsqueeze_to(grad, dim, self.sym_sizes())
1595 self: grad.unsqueeze(dim)
1600 self: unsqueeze_to(grad, dim, self.sym_sizes())
1603 self: unsqueeze_multiple(grad, dim, self.dim())
1606 self: unsqueeze_to(grad, self.sym_sizes())
1610 self: unsqueeze_to(grad, dim, self.sym_sizes())
1614 self: unsqueeze_to(grad, dim, self.sym_sizes())
1618 self: std_backward(result, grad, self, dim, correction, keepdim)
1629 self: handle_r_to_c(self.scalar_type(), grad)
1630 other: handle_r_to_c(other.scalar_type(), maybe_multiply(-grad, alpha.conj()))
1634 self: handle_r_to_c(self.scalar_type(), grad)
1638 self: handle_r_to_c(self.scalar_type(), maybe_multiply(-grad, alpha.conj()))
1639 other: handle_r_to_c(other.scalar_type(), grad)
1643 self: handle_r_to_c(self.scalar_type(), maybe_multiply(-grad, alpha.conj()))
1647 self: grad.expand_symint(self.sym_sizes())
1653 self: sum_backward(grad, self.sym_sizes(), dim, keepdim)
1657 self: _nested_sum_backward(grad, self, dim, keepdim)
1660 self: nansum_backward(grad.to(self.scalar_type()), self, dim, keepdim)
1682 self: grad.t()
1686 self: grad.t()
1693 self: grad.flip(dims)
1697 …self: grad.roll_symint(fmap(reverse_list_symint(shifts), [](c10::SymInt i){return -i;}), reverse_l…
1701 self: grad.rot90(-k, dims)
1705 self: take_backward(grad, self, index)
1710 self: grad * (1 + result.pow(2)).conj()
1714 self: tanh_backward(grad, result)
1718 self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), true)
1723 self: trace_backward_symint(grad, self.sym_sizes())
1727 self: grad.transpose(dim0, dim1)
1731 self: grad.transpose(dim0, dim1)
1740 …self, B: linalg_solve_triangular_backward(grad, self, result, upper, left, unitriangular, grad_inp…
1744 self: grad.tril(diagonal)
1748 self: grad.triu(diagonal)
1752 self: zeros_like(grad)
1760 self: to_dense_backward(grad, self, masked_grad)
1767 self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1774 self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1781 self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1788 self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1795 self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1802 self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1805 self: to_mkldnn_backward(grad, self)
1808 self: unfold_backward_symint(grad, self.sym_sizes(), dimension, size, step)
1812 grad_in: grad.unfold(dim, size, step)
1816 self: zeros_like(grad)
1840 self: grad.reshape_symint(self.sym_sizes())
1844 self: grad
1848 self: grad
1852 self: grad.squeeze(dim)
1856 self: grad.squeeze(dim)
1860 self: var_backward(grad, self, dim, correction, keepdim)
1873 self: grad.reshape_symint(self.sym_sizes())
1876 self: grad.reshape_as(self)
1883 self: at::view_as_complex(grad.contiguous()) # gx0 + 1j * gx1
1887 self: at::view_as_real(grad.contiguous().resolve_conj()) # [gx, gy]
1892 self: where(condition, grad, 0)
1893 other: where(condition, 0, grad)
1900 …grad.defined() ? (GradMode::is_enabled() ? _weight_norm_differentiable_backward(grad.contiguous(),…
1903 self: zeros_like(grad)
1907 self: sparse_mask_backward(grad, mask, self.layout())
1912 values: grad.sparse_mask(result)._values()
1918 values: grad.to_dense().sparse_mask(result).values()
1921 self: at::_sparse_sum_backward(grad, self, dim)
1924 self: grad * _standard_gamma_grad(self, result)
1932 self: values_backward(grad, self)
1934 …self: at::_nested_view_from_buffer(grad.contiguous(), self._nested_tensor_size(), self._nested_ten…
1943 i1, i2, i3: "_trilinear_backward(grad,
1953 self: constant_pad_nd_backward(grad, pad)
1957 self: binary_cross_entropy_backward(grad, self, target, weight, reduction)
1958 target: binary_cross_entropy_target_backward(grad, self, target, weight, reduction)
1965 self: binary_cross_entropy_double_backward(grad_output, grad, self, target, weight, reduction)
1966 …target: binary_cross_entropy_double_backward_target(grad, grad_output, self, target, weight, reduc…
1967 …grad_output: binary_cross_entropy_double_backward_grad_output(grad, self, target, weight, reductio…
1973 self: binary_cross_entropy_with_logits_backward(grad, self, target, weight, pos_weight, reduction)
1974 …target: binary_cross_entropy_with_logits_target_backward(grad, self, target, weight, pos_weight, r…
1982 …weight: embedding_backward_symint(grad, indices, weight.sym_size(0), padding_idx, scale_grad_by_fr…
1986 grad_output: embedding_dense_double_backward_symint(grad, indices, padding_idx)
1993 …weight: _embedding_bag_backward_symint(grad, indices, offsets, result1, result2, result3, weight.s…
1994 …per_sample_weights: _embedding_bag_per_sample_weights_backward(grad, weight, indices, offsets, res…
1996 - name: _embedding_bag_dense_backward(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_si…
2007 self: mse_loss_backward(grad, self, target, reduction)
2008 target: mse_loss_backward(grad, target, self, reduction)
2012 self: multi_margin_loss_backward(grad, self, target, p, margin, weight, reduction)
2016 self: multilabel_margin_loss_backward(grad, self, target, reduction, is_target)
2020 self: nll_loss_backward_symint(grad, self, target, weight, reduction, ignore_index, total_weight)
2025 …self: nll_loss2d_backward_symint(grad, self, target, weight, reduction, ignore_index, total_weight)
2030 self: smooth_l1_loss_backward(grad, self, target, reduction, beta)
2031 target: smooth_l1_loss_backward(grad, target, self, reduction, beta)
2035 self: huber_loss_backward(grad, self, target, reduction, delta)
2036 target: huber_loss_backward(grad, target, self, reduction, delta)
2040 self: soft_margin_loss_backward(grad, self, target, reduction)
2044 self: threshold_backward(grad, result, 0)
2048 …ode::is_enabled() ? infinitely_differentiable_silu_backward(grad, self) : silu_backward(grad, self…
2052 …ode::is_enabled() ? infinitely_differentiable_mish_backward(grad, self) : mish_backward(grad, self…
2056 self: elu_backward(grad, alpha, scale, input_scale, /* is_result */ false, self)
2060 self: elu_backward(grad, alpha, scale, input_scale, /* is_result */ true, result)
2064 self: elu_backward(grad, alpha, 1, 1.0/alpha.toFloat(), /* is_result */ false, self)
2068 self: elu_backward(grad, alpha, 1, 1.0/alpha.toFloat(), /* is_result */ true, result)
2072 self: gelu_backward(grad, self, approximate)
2076 grad_output: gelu_backward(grad, self, approximate)
2077 self: gelu_double_backward(grad, grad_output, self, approximate)
2083 self: glu_backward(grad, self, dim)
2087 self: hardshrink_backward(grad, self, lambd)
2091 grad_out: hardshrink_backward(grad, self, lambd)
2092 self: zeros_like(grad)
2096 self: hardtanh_backward(grad, self, min_val, max_val)
2100 self: leaky_relu_backward(grad, self, negative_slope, false)
2104 self: leaky_relu_backward(grad, result, negative_slope, true)
2108 self: log_sigmoid_backward(grad, self, buffer)
2113 self: _log_softmax_backward_data(grad, result, dim, self.scalar_type())
2117 self: _sparse_log_softmax_backward_data(grad, result, dim, self)
2120 self: _masked_softmax_backward(grad, result, mask, dim)
2124 …self, weight: "grad.defined() ? _prelu_kernel_backward(grad, self, weight) : std::tuple<Tensor, Te…
2138 self: rrelu_with_noise_backward(grad, self, noise, lower, upper, training, false)
2142 self: rrelu_with_noise_backward(grad, result, noise, lower, upper, training, true)
2145 self: _softmax_backward_data(grad, result, dim, self.scalar_type())
2149 self: _sparse_softmax_backward_data(grad, result, dim, self)
2152 self: sparse_sparse_matmul_backward(grad, self, other, 0)
2153 other: sparse_sparse_matmul_backward(grad, self, other, 1)
2156 self: softplus_backward(grad, self, beta, threshold)
2160 self: softshrink_backward(grad, self, lambd)
2164 self: threshold_backward(grad, self, threshold)
2168 self: threshold_backward(grad, self, threshold)
2172 self: reflection_pad1d_backward_symint(grad, self, padding)
2176 self: reflection_pad2d_backward_symint(grad, self, padding)
2180 self: reflection_pad3d_backward_symint(grad, self, padding)
2184 self: replication_pad1d_backward_symint(grad, self, padding)
2188 self: replication_pad2d_backward_symint(grad, self, padding)
2192 self: replication_pad3d_backward_symint(grad, self, padding)
2196 …self: upsample_linear1d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scales)
2200 …self: upsample_bilinear2d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scal…
2204 …self: _upsample_bilinear2d_aa_backward_symint(grad, output_size, self.sym_sizes(), align_corners, …
2208 …self: upsample_bicubic2d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scale…
2212 …self: _upsample_bicubic2d_aa_backward_symint(grad, output_size, self.sym_sizes(), align_corners, s…
2216 …self: upsample_trilinear3d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, sca…
2220 self: upsample_nearest1d_backward_symint(grad, output_size, self.sym_sizes(), scales)
2224 self: _upsample_nearest_exact1d_backward_symint(grad, output_size, self.sym_sizes(), scales)
2228 self: upsample_nearest2d_backward_symint(grad, output_size, self.sym_sizes(), scales_h, scales_w)
2232 …self: _upsample_nearest_exact2d_backward_symint(grad, output_size, self.sym_sizes(), scales_h, sca…
2236 …self: upsample_nearest3d_backward_symint(grad, output_size, self.sym_sizes(), scales_d, scales_h, …
2240 …self: _upsample_nearest_exact3d_backward_symint(grad, output_size, self.sym_sizes(), scales_d, sca…
2244 self: pixel_unshuffle(grad, upscale_factor)
2248 self: pixel_shuffle(grad, downscale_factor)
2252 self: _adaptive_avg_pool2d_backward(grad, self)
2256 self: _adaptive_avg_pool3d_backward(grad, self)
2260 self: adaptive_max_pool2d_backward(grad, self, result1)
2265 self: adaptive_max_pool3d_backward(grad, self, result1)
2270 …self: avg_pool2d_backward(grad, self, kernel_size, stride, padding, ceil_mode, count_include_pad, …
2274 …self: avg_pool3d_backward(grad, self, kernel_size, stride, padding, ceil_mode, count_include_pad, …
2278 self: fractional_max_pool2d_backward(grad, self, kernel_size, output_size, result1)
2283 self: fractional_max_pool3d_backward(grad, self, kernel_size, output_size, result1)
2288 …input, weight, bias: "grad.defined() ? linear_backward(input, grad, weight, grad_input_mask) : std…
2295 self: max_pool2d_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mode)
2298 …self, weight, bias: "grad.defined() ? mps_convolution_backward_symint(self, grad, weight, padding,…
2304 …self: max_pool2d_with_indices_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mo…
2309 …self: max_pool3d_with_indices_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mo…
2314 self: max_pool_double_backward(grad, indices, 2)
2319 self: max_pool_double_backward(grad, indices, 3)
2324 …input, weight, bias: "grad.defined() ? convolution_backward_symint(grad, input, weight, bias->sym_…
2331 …input, weight, bias: "grad.defined() ? convolution_backward_symint(grad, input, weight, bias->sym_…
2341 …input, weight, bias: "grad.defined() ? convolution_backward_overrideable_symint(grad, input, weigh…
2347 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2350 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2353 …self, weight, bias: "grad.defined() ? _slow_conv2d_backward_symint(grad, self, weight, kernel_size…
2359 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad.contiguous(), self, weight,…
2362 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad.contiguous(), self, weight,…
2365 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2368 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2371 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2374 self: im2col(grad, kernel_size, dilation, padding, stride)
2378 …self: col2im_symint(grad, {self.sym_size(-2), self.sym_size(-1)}, kernel_size, dilation, padding, …
2382 …grad_output: _adaptive_avg_pool2d_symint(grad, {grad_output.sym_size(-2), grad_output.sym_size(-1)…
2387 …grad_output: _adaptive_avg_pool3d_symint(grad, { grad_output.sym_size(-3), grad_output.sym_size(-2…
2392 grad_output: max_pool_double_backward(grad, indices, 2)
2397 grad_output: max_pool_double_backward(grad, indices, 3)
2402 …grad_output: avg_pool2d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_…
2407 …grad_output: avg_pool3d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_…
2412 grad_output: elu_backward(grad, alpha, scale, input_scale, is_result, self_or_result)
2413 …self_or_result: elu_double_backward(grad, grad_output, alpha, scale, input_scale, is_result, self_…
2417 grad_output: max_pool_double_backward(grad, indices, 2)
2422 grad_output: max_pool_double_backward(grad, indices, 3)
2427 grad_output: glu_double_backward_grad_output(grad, self, dim)
2428 self: glu_double_backward(grad, grad_output, self, dim)
2432 grad_output: hardtanh_backward(grad, self, min_val, max_val)
2433 self: zeros_like(grad)
2437 grad_output: log_sigmoid_backward(grad, self, buffer)
2438 self: log_sigmoid_double_backward(grad * grad_output, self)
2442 grad_output: grad.to(output.dtype()) - (grad.to(output.dtype()) * output.exp()).sum(dim, true)
2443 output: (-grad_output.sum(dim, true) * output.exp() * grad.to(output.dtype())).to(output.dtype())
2447 grad_output: leaky_relu_backward(grad, self, negative_slope, false)
2448 self: zeros_like(grad)
2475 grad_output: max_pool_double_backward(grad, indices, 2)
2481 grad_output: max_pool_double_backward(grad, indices, 3)
2487 grad_output: mse_loss_backward(grad, self, target, reduction)
2488 self: mse_loss_double_backward(grad * grad_output, self, reduction)
2489 target: -mse_loss_double_backward(grad * grad_output, target, reduction)
2496 grad_output: nll_loss_symint(grad, target, weight, reduction, ignore_index)
2497 self: zeros_like(grad)
2501 grad_output: nll_loss2d_symint(grad, target, weight, reduction, ignore_index)
2502 self: zeros_like(grad)
2507 grad_output: rrelu_with_noise_backward(grad, self, noise, lower, upper, training, false)
2508 self: zeros_like(grad)
2512 grad_output: reflection_pad1d_symint(grad, padding)
2517 grad_output: reflection_pad2d_symint(grad, padding)
2522 grad_output: reflection_pad3d_symint(grad, padding)
2527 grad_output: replication_pad1d_symint(grad, padding)
2532 grad_output: replication_pad2d_symint(grad, padding)
2537 grad_output: replication_pad3d_symint(grad, padding)
2542 self, mat1, mat2: "sparse_sampled_addmm_backward(grad,
2550 …self, other: "grad.defined() ? _sparse_mm_reduce_impl_backward(self, grad, other, reduce, result1,…
2553 grad_output: smooth_l1_loss_backward(grad, self, target, reduction, beta)
2554 self: smooth_l1_loss_double_backward(grad * grad_output, self, target, reduction, beta)
2555 target: -smooth_l1_loss_double_backward(grad * grad_output, self, target, reduction, beta)
2562 …grad_output: huber_loss_double_backward_grad_output(grad, grad_output, self, target, reduction, de…
2563 self: huber_loss_double_backward(grad * grad_output, self, target, reduction, delta)
2564 target: -huber_loss_double_backward(grad * grad_output, self, target, reduction, delta)
2567 grad_output: softplus_backward(grad, self, beta, threshold)
2568 self: softplus_double_backward(grad * grad_output, self, beta, threshold)
2573 grad_output: _softmax_backward_data(grad.to(output.dtype()), output, dim, input_dtype)
2574 …output: softmax_double_backward(grad.to(output.dtype()), grad_output, dim, output).to(output.dtype…
2577 …grad_output: soft_margin_loss_double_backward_grad_output(grad, grad_output, self, target, reducti…
2578 self: soft_margin_loss_double_backward(grad * grad_output, self, target, reduction)
2581 grad_output: softshrink_backward(grad, self, lambd)
2582 self: zeros_like(grad)
2586 grad_output: threshold_backward(grad, self, threshold)
2587 self: zeros_like(grad)
2591 grad_output: upsample_linear1d_symint(grad, output_size, align_corners, scales)
2595 grad_output: upsample_bilinear2d_symint(grad, output_size, align_corners, scales_h, scales_w)
2599 grad_output: _upsample_bilinear2d_aa_symint(grad, output_size, align_corners, scales_h, scales_w)
2603 grad_output: upsample_bicubic2d_symint(grad, output_size, align_corners, scales_h, scales_w)
2607 grad_output: _upsample_bicubic2d_aa_symint(grad, output_size, align_corners, scales_h, scales_w)
2611 …grad_output: upsample_trilinear3d_symint(grad, output_size, align_corners, scales_d, scales_h, sca…
2615 grad_output: upsample_nearest1d_symint(grad, output_size, scales)
2619 grad_output: _upsample_nearest_exact1d_symint(grad, output_size, scales)
2623 grad_output: upsample_nearest2d_symint(grad, output_size, scales_h, scales_w)
2627 grad_output: _upsample_nearest_exact2d_symint(grad, output_size, scales_h, scales_w)
2631 grad_output: upsample_nearest3d_symint(grad, output_size, scales_d, scales_h, scales_w)
2635 grad_output: _upsample_nearest_exact3d_symint(grad, output_size, scales_d, scales_h, scales_w)
2639 grad_output: sigmoid_backward(grad, output.conj())
2640 output: grad.conj() * grad_output * (-2 * output.conj() + 1)
2644 grad_output: tanh_backward(grad, output.conj())
2645 output: grad.conj() * (-2 * output.conj() * grad_output)
2650 log_probs: _cudnn_ctc_loss_backward(grad, result0, result1, zero_infinity)
2653 log_probs: _cudnn_ctc_loss_backward(grad, result0, result1, zero_infinity)
2656 …self, weight: "_cudnn_convolution_backward(self, grad, weight, padding, output_padding, stride, di…
2659 …self, weight: "grad.defined() ? mps_convolution_transpose_backward_symint(self, grad, weight, padd…
2662 …self, weight: "_cudnn_convolution_backward(self, grad, weight, padding, std::vector<c10::SymInt>(p…
2665 …self, grid: "grad.defined() ? cudnn_grid_sampler_backward(self, grid, grad) : std::tuple<Tensor, T…
2668 theta: cudnn_affine_grid_generator_backward(grad, N, C, H, W)
2678 …grad.defined() ? (training ? cudnn_batch_norm_backward(input, grad.contiguous(input.suggest_memory…
2694 …input, weight, bias: "grad.defined() ? convolution_backward_symint(grad, input, weight, bias->sym_…
2726 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2729 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2732 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2735 …grad.defined() ? (training ? miopen_batch_norm_backward(input, grad.contiguous(), weight, running_…
2759 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2762 self, weight, bias: mkldnn_linear_backward(self, grad, weight, grad_input_mask)
2765 …self: mkldnn_max_pool2d_backward(grad, result, self, kernel_size, stride, padding, dilation, ceil_…
2768 …self: mkldnn_max_pool3d_backward(grad, result, self, kernel_size, stride, padding, dilation, ceil_…
2771 self: mkldnn_adaptive_avg_pool2d_backward(grad, self)
2774 self: grad.reshape_symint(self.sym_sizes())
2778 list: "grad.defined()? at::unbind(grad) : std::vector<Tensor>(list.size())"
2781 t: grad.to_padded_tensor_symint(0, t.sym_sizes())
2785 padded: _nested_from_padded_backward(grad, padded, fuse_transform_0213)
2789 self: at::_nested_from_padded(grad, self._nested_tensor_size())
2793 self: grad.values()
2798 self: grad.values()
2804 …self: _nested_view_from_jagged(grad, at::_nested_get_offsets(self), at::_nested_get_jagged_dummy(s…
2809 …query, key, value, attn_bias: _scaled_dot_product_efficient_attention_backward(grad, query, key, v…
2813 …query, key, value: _scaled_dot_product_flash_attention_backward_symint(grad, query, key, value, ou…
2817 …query, key, value: _scaled_dot_product_flash_attention_for_cpu_backward(grad, query, key, value, o…
2821 …query, key, value: _flash_attention_backward_symint(grad, query, key, value, output, softmax_logsu…
2825 …query, key, value, bias: _efficient_attention_backward_symint(grad, query, key, value, bias, outpu…
2829 …query, key, value: _scaled_dot_product_cudnn_attention_backward_symint(grad, query, key, value, ou…
2833 self: fft_r2c_backward(grad, dim, normalization, onesided, self.sym_size(dim.back()))
2837 self: fft_c2r_backward(grad, dim, normalization)
2841 self: _fft_c2c_symint(grad, dim, normalization, !forward)
2854 tensors: stack_tensors_backward(grad, dim, to_args_scalartypes(tensors))
2866 …grad.defined() ? (GradMode::is_enabled() ? _thnn_differentiable_gru_cell_backward(grad, input_gate…
2870 input: _pack_padded_sequence_backward_symint(grad, input.sym_sizes(), result1, batch_first)
2916 data: _segment_reduce_backward(grad, result, data, reduce, lengths, offsets, axis, initial)
2919 self: grad
2927 self: warn_backwards(grad)
2932 self: grad.expand_symint(self.sym_sizes()) + 1
2935 self: grad.mul(grad)
2937 self: grad.expand_symint(self.sym_sizes()) * 2
2942 self: grad.mul(grad).add(grad)
2947 self: grad.reshape_as(self)
2949 self: grad.reshape_as(self) + 1
2955 self, src: scatter_reduce_backward(grad, self, dim, index, src, reduce, include_self, result)
3116 self: grad.reshape_symint(self.sym_sizes())