derivatives.yaml - OpenGrok cross reference for /aosp_15_r20/external/pytorch/tools/autograd/derivatives.yaml

Lines Matching full:grad
6 # and `grad == grads[0]`, in all the derivative formulas in this file.
35 #     uses `grad` instead of `grads[idx]`, then all but the first output will
63 #   - 'grad', the gradient of the output (often spelled grad_output
77 #     'grad' (this case is special-cased in our code generation).
83 #     permitted to mix the use of "grad", "grads", and
99 #     grad, and `grad_input_mask[1]` is true if `input1` requires grad.
104 #     grad.  If we want to support more fine-grained signalling,
140 # called 'grad' (even though it really is a grad-grad).
147 # value of the argument "foo_p", its forward grad "foo_t" and the result of the
156 # will always have their forward grad formula called. This function is responsible
178 #     gradient Tensors, where `grad[i].defined() == false`. Depending on the
180 #     generation may automatically add some level of undefined grad support,
183 #       * 1 input grad and 1 output grad:
184 #           Complete undefined grad support is automatically added, so you
188 #       * 1 input grad and multiple output grads:
189 #           Undefined grad support is automatically added ONLY in the case where
196 #     If your derivative formula uses more than one output grad, it is usually
197 #     preferable to add undefined grad support in the backward function itself
204 #     Therefore, your backward function should treat an undefined output grad as
215 #     an undefined Tensor is correct for a given input grad, it is also logically
216 #     correct to return a defined grad full of zeros, but that would not be
222   self: grad * self.sgn()
226   self: grad * -((-self * self + 1).rsqrt()).conj()
230   self: handle_r_to_c(self.scalar_type(), grad)
231   other: handle_r_to_c(other.scalar_type(), maybe_multiply(grad, alpha.conj()))
235   self: handle_r_to_c(self.scalar_type(), grad)
239   self: maybe_multiply(grad, beta.conj())
240 …batch1: maybe_multiply(grad.unsqueeze(0).expand_symint({ batch1.sym_size(0), batch1.sym_size(1), b…
241 …batch2: maybe_multiply(batch1.transpose(1, 2).conj().bmm(grad.unsqueeze(0).expand_symint({ batch1.…
245   self: handle_r_to_c(self.scalar_type(), grad)
246   tensor1: handle_r_to_c(tensor1.scalar_type(), grad * (value / tensor2).conj())
247 …tensor2: handle_r_to_c(tensor2.scalar_type(), -grad * (value * tensor1 / (tensor2 * tensor2)).conj…
251   self: handle_r_to_c(self.scalar_type(), grad)
252   tensor1: handle_r_to_c(tensor1.scalar_type(), grad * (tensor2 * value).conj())
253   tensor2: handle_r_to_c(tensor2.scalar_type(), grad * (tensor1 * value).conj())
257   self: maybe_multiply(grad, beta.conj())
258   mat1: mm_mat1_backward(grad, mat2, mat1.sym_sizes(), mat1.sym_strides(), mat1.layout(), alpha)
259   mat2: mm_mat2_backward(grad, mat1, mat2.sym_sizes(), mat2.sym_strides(), mat2.layout(), alpha)
263   self: maybe_multiply(grad, beta)
264   mat1: mm_mat1_sparse_backward(grad, mat1, mat2, alpha)
265   mat2: mm_mat2_backward(grad, mat1, mat2.sym_sizes(), mat2.sym_strides(), mat2.layout(), alpha)
268   self: maybe_multiply(grad, beta.conj())
269   mat: maybe_multiply(grad.ger(vec.conj()), alpha.conj())
270   vec: maybe_multiply(mat.t().conj().mv(grad), alpha.conj())
274   self: maybe_multiply(grad, beta.conj())
275   vec1: maybe_multiply(grad.mv(vec2.conj()), alpha.conj())
276   vec2: maybe_multiply(grad.t().mv(vec1.conj()), alpha.conj())
280   theta: affine_grid_generator_backward_symint(grad, size, align_corners)
283   self: grad
287   self: angle_backward(grad, self)
318 …self: "self.is_complex() ? grad * ((self + 1).rsqrt() * (self - 1).rsqrt()).conj() : grad * (self …
325   self: grad * (self.pow(2) + 1).rsqrt().conj()
332   self: grad * 1 / (1 - self.pow(2)).conj()
339   self: as_strided_backward(grad, TensorGeometry(self), size, stride, storage_offset)
343   self: as_strided_backward(grad, TensorGeometry(self), size, stride, storage_offset)
347   self: grad * (-self * self + 1).rsqrt().conj()
351   self: grad / (self * self + 1).conj()
355   self, other: atan2_backward(grad, self, other, grad_input_mask)
359   self: maybe_multiply(grad, beta.conj())
360   batch1: maybe_multiply(grad.bmm(batch2.transpose(1, 2).conj()), alpha.conj())
361   batch2: maybe_multiply(batch1.transpose(1, 2).conj().bmm(grad), alpha.conj())
365   self: zeros_like(grad)
369   self: zeros_like(grad)
374   self: zeros_like(grad)
378   self: grad.bmm(mat2.transpose(1, 2).conj())
379   mat2: self.transpose(1, 2).conj().bmm(grad)
383   self, other: matmul_backward(grad, self, other, grad_input_mask)
386 …tensors: cat_tensors_backward(grad, to_args_sizes_symint(tensors), to_args_scalartypes(tensors), d…
390   self: zeros_like(grad)
394   self: zeros_like(grad)
398   self: cholesky_backward(grad, upper, result)
401   self: cholesky_backward(grad, upper, L)
405   self, input2: cholesky_solve_backward(grad, self, input2, result, upper, grad_input_mask)
409   self: cholesky_inverse_backward(grad, self, upper, result)
415   self: clamp_backward(grad, self, min, max)
416   min, max: clamp_backward_min_max(grad, self, min, max, grad_input_mask)
420   self: clamp_backward(grad, self, min, max)
424   self: where(self >= min, grad, at::scalar_tensor(0., grad.options()))
428   self: where(self >= min, grad, at::scalar_tensor(0., grad.options()))
429   min: where(self < min, grad, at::scalar_tensor(0., grad.options()))
433   self: where(self <= max, grad, at::scalar_tensor(0., grad.options()))
437   self: where(self <= max, grad, at::scalar_tensor(0., grad.options()))
438   max: where(self > max, grad, at::scalar_tensor(0., grad.options()))
442   self: grad
446   self: grad
450   self: _to_copy_backward(grad, self.options())
453   # (If dtype IS nullopt, we rely on the regular check that any input requires grad).
457   self: grad
460   real: at::real(grad)
461   imag: at::imag(grad)
465   abs, angle: polar_backward(grad, result)
469   self: grad.conj()
473   self: grad.neg()
477   self: grad.conj_physical()
481   self: grad.conj_physical()
485   self: copysign_tensor_self_backward(grad, self, result)
490   self: copysign_tensor_self_backward(grad, self, result)
494   self: grad * -self.sin().conj()
498   self: grad * self.sinh().conj()
508   self: at::linalg_cross(other.conj(), grad, dim)
509   other: at::linalg_cross(grad, self.conj(), dim)
513   self: logcumsumexp_backward(grad, self, result, dim)
517   self: cumprod_backward(grad.to(self.scalar_type()), self, dim, result)
521   self: cumsum_backward(grad.to(self.scalar_type()), dim)
525   self: cummaxmin_backward(grad, self, indices, dim)
529   self: cummaxmin_backward(grad, self, indices, dim)
533 …self, weight, bias: "grad.defined() ? conv_tbc_backward(grad, self, weight, bias, pad) : std::tupl…
536 …log_probs: _ctc_loss_backward(grad, log_probs, targets, input_lengths, target_lengths, result0, re…
539 …log_probs: _ctc_loss_backward(grad, log_probs, targets, input_lengths, target_lengths, result0, re…
542   self: deg2rad_backward(grad)
546   A: linalg_det_backward(grad, result, A, LU, pivots)
556   tensors: block_diag_backward(grad, to_args_sizes(tensors), to_args_scalartypes(tensors))
560   self: grad.diagonal(offset, dim1, dim2)
564   self: diagonal_backward_symint(grad, self.sym_sizes(), offset, dim1, dim2)
568   grad_output: grad.diagonal(offset, dim1, dim2)
572   self: norm_backward(grad, self - other, p, result)
573   other: -norm_backward(grad, self - other, p, result)
580   self: div_tensor_self_backward(grad, other, self.scalar_type())
581   other: div_tensor_other_backward(grad, self, other)
585   self: div_tensor_self_backward(grad, other, self.scalar_type())
589   self: div_tensor_self_backward(grad, other, self.scalar_type(), rounding_mode)
590   other: div_tensor_other_backward(grad, self, other, rounding_mode)
594   self: div_tensor_self_backward(grad, other, self.scalar_type(), rounding_mode)
598   self: grad * tensor.conj()
599   tensor: grad * self.conj()
603   self: grad.conj() * other
604   other: grad * self
608   self: _fused_dropout_backward(grad, result1, p)
611 …pout_backward(grad, result1, (!train.has_value() || !train.value() ? 1 : (p == 1 ? 0.0 : 1.0 / (1.…
615   grad_output: "native_dropout_double_backward(grad, grad_output, mask, scale)"
628   self: 2.0 / sqrt(M_PI) * exp(-(self.pow(2))) * grad
632   self: -2.0 / sqrt(M_PI) * exp(-(self.pow(2))) * grad
636   self: (2.0 * self * result - 2.0 / sqrt(M_PI)) * grad
640   self: 0.5 * sqrt(M_PI) * exp(self.erfinv().pow(2)) * grad
644   self: grad * result.conj()
648   self: grad * result.conj() * M_LN2
652   self: grad * (result.conj() + 1)
657   self: at::sum_to(grad, self.sym_sizes())
661   self: zeros_like(grad)
665   self: fake_quantize_per_tensor_affine_cachemask_backward(grad, mask)
668   self: fake_quantize_per_tensor_affine_cachemask_backward(grad, mask)
671 …self, scale, zero_point: "grad.defined() ? _fake_quantize_learnable_per_tensor_affine_backward(gra…
674   self: fake_quantize_per_channel_affine_cachemask_backward(grad, mask)
677 …self, scale, zero_point: "grad.defined() ? _fake_quantize_learnable_per_channel_affine_backward(gr…
680   self: fake_quantize_per_tensor_affine_cachemask_backward(grad, mask)
683   self: zeros_like(grad)
687   self: zeros_like(grad)
688   value: grad.sum()
692   self: zeros_like(grad)
696   self: zeros_like(grad)
697   value: grad.sum()
701   self: zeros_like(grad)
705   self: grad
709   self: grad
710   other: -grad * self.div(other, /*rounding_mode=*/"trunc")
714   self: grad
718   self: grad / exponent.exp2()
722   self: gather_backward(grad, self, dim, index, sparse_grad)
736   self: zeros_like(grad)
761 …input, grid: "grad.defined() ? grid_sampler_2d_backward(grad, input, grid, interpolation_mode, pad…
764 …input, grid: "grad.defined() ? grid_sampler_3d_backward(grad, input, grid, interpolation_mode, pad…
768 …input, grid: "grad.defined() ? _grid_sampler_2d_cpu_fallback_backward(grad, input, grid, interpola…
780   self: hardsigmoid_backward(grad, self)
787   self: hardswish_backward(grad, self)
791   grad_output: hardswish_backward(grad, self)
792 …self: at::where(at::logical_and(-3.0 < self, self < 3.0), grad * grad_output / 3.0, at::zeros({}, …
797   self: grad * self / result
798   other: grad * other / result
802   self: grad * at::special_i1(self)
806   self: grad * (at::special_i1e(self) - self.sgn() * result)
810   self: i1_backward(grad, self, result)
814   self: i1e_backward(grad, self, result)
819   other: grad * exp((self - 1) * log(other) - other - lgamma(self))
823   other: -grad * exp((self - 1) * log(other) - other - lgamma(self))
826   self: index_backward(grad.new_zeros_symint(self.sym_sizes(), self.options()), indices, grad)
830 …self: at::_unsafe_index_put(grad.new_zeros_symint(self.sym_sizes(), self.options()), indices, grad…
834   self: grad
838 …source: "maybe_multiply(source.dim() > 0 ? grad.index_select(dim, index).expand_as(source) : grad.…
843   self, source: index_reduce_backward(grad, self, dim, index, source, reduce, include_self, result)
847   self: grad.index_fill(dim, index, 0)
851 …source: "source.dim() > 0 ? grad.index_select(dim, index).expand_as(source) : grad.index_select(di…
856   self: grad.index_fill(dim, index, 0)
861   self: grad.index_fill(dim, index, 0)
862   value: grad.index_select(dim, std::get<0>(at::_unique(index, /*sorted=*/false))).sum()
867   self: "accumulate ? grad : grad.index_put(indices, zeros_like(values), false)"
868   values: grad.index(indices)
872   self: "accumulate ? grad : at::_unsafe_index_put(grad, indices, zeros_like(values), false)"
873   values: at::_unsafe_index(grad, indices)
877   self: "accumulate ? grad : grad.index_put(indices, zeros_like(values), false)"
878   values: grad.index(indices)
882   self: index_select_backward_symint(grad, self.sym_sizes(), dim, index)
887   A: -at::matmul(inverse.mH(), at::matmul(grad, inverse.mH()))
892   self: pinv_backward(grad, result, self)
899   self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
912 …self: "weight.isComplex() ? grad * (1 - weight.conj().toComplexDouble()) : grad * (1 - weight.toDo…
913   end: grad * weight.conj()
917   self: grad * (1 - weight).conj()
918   end: grad * weight.conj()
919   weight: grad * (end - self).conj()
923   self: grad * digamma(self)
927   self: grad * polygamma(1, self)
931   self: grad * polygamma(n + 1, self)
935   self: grad * polygamma(n + 1, self)
939   self: grad.div(self.conj())
943   self: grad / (self.conj() * 2.3025850929940456)
947   self: log1p_backward(grad, self)
951   self: grad / (self.conj() * 0.6931471805599453)
955   self: grad / (1 + exp(other - self)).conj()
956   other: grad / (1 + exp(self - other)).conj()
960   self: grad / (1 + pow(2, other - self))
961   other: grad / (1 + pow(2, self - other))
973   self: at::xlogy(grad, other).masked_fill((self == 0.) & (other <= 0.), 0.)
974   other: grad * self / other
978   other: grad * self / other
983           ? at::xlogy(grad,  other)
984           : at::xlogy(grad,  other).masked_fill(self == 0., 0.)"
990   self: at::special_xlog1py(grad,  other).masked_fill((self == 0.) & (other <= -1.), 0.)
991   other: grad * self / (other + 1)
995   other: grad * self / (other + 1)
1000           ? at::special_xlog1py(grad,  other)
1001           : at::special_xlog1py(grad,  other).masked_fill(self == 0., 0.)"
1006   other:  grad * -self * special_zeta(self + 1., other)
1009   other:  grad * -self * special_zeta(self.toDouble() + 1., other)
1015   self: zeros_like(grad)
1019   self: logsumexp_backward(grad, self, result, dim, keepdim)
1023   self, b: linalg_lstsq_backward(grad, self, b, grad_input_mask)
1037   A: lu_factor_ex_backward(grad, LU, pivots, pivot)
1048   LU: linalg_lu_solve_LU(grad, LU, pivots, result, left, adjoint)
1049   B: "at::linalg_lu_solve(LU, pivots, grad, left, !adjoint)"
1060   self: grad.masked_fill(mask, 0)
1065   self: grad.masked_fill(mask, 0)
1066   value: masked_fill_backward(grad, mask)
1071   self: grad.masked_fill(mask, 0)
1072   source: masked_scatter_backward_symint(grad, mask, source.sym_sizes())
1077   grad_output: zeros_like(grad_output).masked_scatter(mask, grad)
1082   self: masked_select_backward(grad, self, mask)
1087   self: linalg_matrix_exp_differential(self, grad, /*adjoint*/ true)
1091   self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1095   self: evenly_distribute_backward(grad, self, result)
1099   self: at::where(self == other, grad / 2, grad).masked_fill_(self < other, 0)
1100   other: at::where(self == other, grad / 2, grad).masked_fill_(self > other, 0)
1104   self: grad.masked_fill((self >= other).logical_or_(other.isnan()).logical_not_(), 0)
1105   other: grad.masked_fill((self >= other).logical_or_(other.isnan()), 0)
1109   self: grad.expand_symint(self.sym_sizes()) / self.sym_numel()
1113   self: mean_backward(grad, self.sym_sizes(), dim, self.sym_numel(), keepdim)
1117   self: evenly_distribute_backward(grad, self, result)
1121   self: evenly_distribute_backward(grad, self, result)
1139   self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1143   self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1147   self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1151   self: evenly_distribute_backward(grad, self, result)
1155   self: at::where(self == other, grad / 2, grad).masked_fill_(self > other, 0)
1156   other: at::where(self == other, grad / 2, grad).masked_fill_(self < other, 0)
1160   self: grad.masked_fill((self <= other).logical_or_(other.isnan()).logical_not_(), 0)
1161   other: grad.masked_fill((self <= other).logical_or_(other.isnan()), 0)
1165 …self: scale_grad_by_count(restore_reduced_dims(grad, dim, keepdim), restore_reduced_dims(result, d…
1169 …self: scale_grad_by_count(restore_reduced_dims(grad, dim, keepdim), restore_reduced_dims(result, d…
1173   self: mm_mat1_backward(grad, mat2, self.sym_sizes(), self.sym_strides(), self.layout(), 1)
1174   mat2: mm_mat2_backward(grad, self, mat2.sym_sizes(), mat2.sym_strides(), mat2.layout(), 1)
1178   self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
1182   self: mul_tensor_backward(grad, other, self.scalar_type())
1183   other: mul_tensor_backward(grad, self, other.scalar_type())
1187   self: mul_tensor_backward(grad, other, self.scalar_type())
1191   self: grad.ger(vec.conj())
1192   vec: self.conj().t().mv(grad)
1196   self: mvlgamma_backward(grad, self, p)
1200   self: grad * at::isfinite(self)
1204 …input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, running_mea…
1208 …input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, running_mea…
1212 …input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, running_mea…
1216 …input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, Tensor(), T…
1225 …input, weight, bias: "grad.defined() ? native_layer_norm_backward_symint(grad, input, normalized_s…
1250   self: grad.neg()
1254 …input, weight, bias: "grad.defined() ? batch_norm_backward(grad, input, weight, running_mean, runn…
1258 …input, weight, bias: "grad.defined() ? batch_norm_backward(grad, input, weight, running_mean, runn…
1272   self: norm_backward(grad, self, p, result)
1276   self: norm_backward(grad, self, p, result, dim, keepdim)
1280   self: norm_backward(grad, self.to(grad.scalar_type()), p, result)
1284   self: norm_backward(grad, self.to(grad.scalar_type()), p, result, dim, keepdim)
1288   self: linalg_vector_norm_backward(grad, self, ord, result, dim, keepdim)
1292   self: _pdist_backward(grad, self, p, result)
1294 - name: _pdist_backward(Tensor grad, Tensor self, float p, Tensor pdist) -> Tensor
1295   grad: not_implemented("_pdist_backward")
1300   x1, x2: _euclidean_dist_backward(grad, x1, x2, result)
1303   x1: _cdist_backward(grad.contiguous(), x1, x2, p, result)
1304   x2: _cdist_backward(grad.mT().contiguous(), x2, x1, p, result.mT().contiguous())
1306 - name: _cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor
1307   grad: not_implemented("_cdist_backward")
1313   self: zeros_like(grad)
1317   mean: at::zeros_symint(mean.sym_sizes(), grad.options())
1321   std: at::zeros_symint(std.sym_sizes(), grad.options())
1325   mean: at::zeros_symint(mean.sym_sizes(), grad.options())
1326   std: at::zeros_symint(std.sym_sizes(), grad.options())
1330   input, tau: householder_product_backward(grad, result, input, tau)
1334 …self, input2, input3: ormqr_backward(grad, result, self, input2, input3, left, transpose, grad_inp…
1337   self: permute_backwards(grad, dims)
1345   self: pow_backward(grad, self, exponent)
1349   self: pow_backward_self(grad, self, exponent)
1350   exponent: pow_backward_exponent(grad, self, exponent, result)
1354   exponent: pow_backward_exponent(grad, self, exponent, result)
1358   self: prod_backward(grad, self.to(grad.scalar_type()), result)
1362   self: prod_backward(grad, self.to(grad.scalar_type()), result, dim, keepdim)
1366   self: "accumulate ? grad : grad.put(index, zeros_like(source), false)"
1368   source: grad.take(index).reshape_as(source)
1376   self: rad2deg_backward(grad)
1380   self: zeros_like(grad)
1384   self: zeros_like(grad)
1388   self: zeros_like(grad)
1392   self: -grad * (result * result).conj()
1396   self: grad
1400   self: grad
1401   other: -grad * self.div(other, /*rounding_mode=*/"floor")
1405   self: renorm_backward(grad, self, p, dim, maxnorm)
1409   self: repeat_backward(grad, repeats, self.sym_sizes())
1413   self: grad * (-(1 + self.log()))
1417   self: grad * std::sqrt(2 * M_PI) * (result.square() / 2).exp()
1421   self: grad / std::sqrt(2 * M_PI) * (result + self.pow(2) / 2).neg().exp()
1437   self: grad.reshape_symint(self.sym_sizes())
1441   self: zeros_like(grad)
1445   self: zeros_like(grad)
1449   self: -0.5 * grad * result.pow(3).conj()
1453   self: grad.scatter(dim, index, 0)
1455   src: grad.gather(dim, index)
1459   self: grad.scatter(dim, index, 0)
1464   self: grad
1466   src: grad.gather(dim, index)
1472       self: select_backward_symint(grad, self.sym_sizes(), dim, index)
1475       self: _nested_select_backward_symint(grad, self, dim, index)
1478   grad_output: grad.select_symint(dim, index)
1482   self: sigmoid_backward(grad, result)
1486 …:is_enabled() ? infinitely_differentiable_logit_backward(grad, self, eps) : logit_backward(grad, s…
1490   self: zeros_like(grad)
1494   self: sgn_backward(self, grad, result)
1501   self: grad * self.cos().conj()
1505   self: sinc_backward(grad, self)
1509   self: grad * self.cosh().conj()
1513   self: slice_backward_wrapper(grad, self.sym_sizes(), dim, start, end, step)
1517   grad_output: grad.slice_symint(dim, start, end, step)
1521   self: grad.slice_symint(dim, start, end, step)
1522   src: slice_scatter_symint(grad, zeros_like(self), dim, start, end, step)
1526   self: slice_scatter_symint(grad, zeros_like(src), dim, start, end, step)
1527   src: grad.slice_symint(dim, start, end, step)
1531   self: select_scatter_symint(grad, zeros_like(src), dim, index)
1532   src: grad.select_symint(dim, index)
1536   self: diagonal_scatter(grad, zeros_like(src), offset, dim1, dim2)
1537   src: grad.diagonal(offset, dim1, dim2)
1541 …self: as_strided_scatter_backward(grad, TensorGeometry(self), TensorGeometry(src), size, stride, s…
1543   src: grad.contiguous().as_strided_symint(size, stride, storage_offset)
1547   A, B: linalg_solve_backward(grad, result, A, LU, pivots, left, grad_input_mask[1])
1552   self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), true)
1557   self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), true)
1582   self: grad / (2 * result.conj())
1586   self: unsqueeze_to(grad, self.sym_sizes())
1592       self: unsqueeze_to(grad, dim, self.sym_sizes())
1595       self: grad.unsqueeze(dim)
1600       self: unsqueeze_to(grad, dim, self.sym_sizes())
1603       self: unsqueeze_multiple(grad, dim, self.dim())
1606   self: unsqueeze_to(grad, self.sym_sizes())
1610   self: unsqueeze_to(grad, dim, self.sym_sizes())
1614   self: unsqueeze_to(grad, dim, self.sym_sizes())
1618   self: std_backward(result, grad, self, dim, correction, keepdim)
1629   self: handle_r_to_c(self.scalar_type(), grad)
1630   other: handle_r_to_c(other.scalar_type(), maybe_multiply(-grad, alpha.conj()))
1634   self: handle_r_to_c(self.scalar_type(), grad)
1638   self: handle_r_to_c(self.scalar_type(), maybe_multiply(-grad, alpha.conj()))
1639   other: handle_r_to_c(other.scalar_type(), grad)
1643   self: handle_r_to_c(self.scalar_type(), maybe_multiply(-grad, alpha.conj()))
1647   self: grad.expand_symint(self.sym_sizes())
1653       self: sum_backward(grad, self.sym_sizes(), dim, keepdim)
1657       self: _nested_sum_backward(grad, self, dim, keepdim)
1660   self: nansum_backward(grad.to(self.scalar_type()), self, dim, keepdim)
1682   self: grad.t()
1686   self: grad.t()
1693   self: grad.flip(dims)
1697 …self: grad.roll_symint(fmap(reverse_list_symint(shifts), [](c10::SymInt i){return -i;}), reverse_l…
1701   self: grad.rot90(-k, dims)
1705   self: take_backward(grad, self, index)
1710   self: grad * (1 + result.pow(2)).conj()
1714   self: tanh_backward(grad, result)
1718   self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), true)
1723   self: trace_backward_symint(grad, self.sym_sizes())
1727   self: grad.transpose(dim0, dim1)
1731   self: grad.transpose(dim0, dim1)
1740 …self, B: linalg_solve_triangular_backward(grad, self, result, upper, left, unitriangular, grad_inp…
1744   self: grad.tril(diagonal)
1748   self: grad.triu(diagonal)
1752   self: zeros_like(grad)
1760   self: to_dense_backward(grad, self, masked_grad)
1767   self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1774   self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1781   self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1788   self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1795   self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1802   self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
1805   self: to_mkldnn_backward(grad, self)
1808   self: unfold_backward_symint(grad, self.sym_sizes(), dimension, size, step)
1812   grad_in: grad.unfold(dim, size, step)
1816   self: zeros_like(grad)
1840   self: grad.reshape_symint(self.sym_sizes())
1844   self: grad
1848   self: grad
1852   self: grad.squeeze(dim)
1856   self: grad.squeeze(dim)
1860   self: var_backward(grad, self, dim, correction, keepdim)
1873       self: grad.reshape_symint(self.sym_sizes())
1876       self: grad.reshape_as(self)
1883   self: at::view_as_complex(grad.contiguous()) # gx0 + 1j * gx1
1887   self: at::view_as_real(grad.contiguous().resolve_conj()) # [gx, gy]
1892   self: where(condition, grad, 0)
1893   other: where(condition, 0, grad)
1900 …grad.defined() ? (GradMode::is_enabled() ? _weight_norm_differentiable_backward(grad.contiguous(),…
1903   self: zeros_like(grad)
1907   self: sparse_mask_backward(grad, mask, self.layout())
1912   values: grad.sparse_mask(result)._values()
1918   values: grad.to_dense().sparse_mask(result).values()
1921   self: at::_sparse_sum_backward(grad, self, dim)
1924   self: grad * _standard_gamma_grad(self, result)
1932       self: values_backward(grad, self)
1934 …self: at::_nested_view_from_buffer(grad.contiguous(), self._nested_tensor_size(), self._nested_ten…
1943   i1, i2, i3: "_trilinear_backward(grad,
1953   self: constant_pad_nd_backward(grad, pad)
1957   self: binary_cross_entropy_backward(grad, self, target, weight, reduction)
1958   target: binary_cross_entropy_target_backward(grad, self, target, weight, reduction)
1965   self: binary_cross_entropy_double_backward(grad_output, grad, self, target, weight, reduction)
1966 …target: binary_cross_entropy_double_backward_target(grad, grad_output, self, target, weight, reduc…
1967 …grad_output: binary_cross_entropy_double_backward_grad_output(grad, self, target, weight, reductio…
1973   self: binary_cross_entropy_with_logits_backward(grad, self, target, weight, pos_weight, reduction)
1974 …target: binary_cross_entropy_with_logits_target_backward(grad, self, target, weight, pos_weight, r…
1982 …weight: embedding_backward_symint(grad, indices, weight.sym_size(0), padding_idx, scale_grad_by_fr…
1986   grad_output: embedding_dense_double_backward_symint(grad, indices, padding_idx)
1993 …weight: _embedding_bag_backward_symint(grad, indices, offsets, result1, result2, result3, weight.s…
1994 …per_sample_weights: _embedding_bag_per_sample_weights_backward(grad, weight, indices, offsets, res…
1996 - name: _embedding_bag_dense_backward(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_si…
2007   self: mse_loss_backward(grad, self, target, reduction)
2008   target: mse_loss_backward(grad, target, self, reduction)
2012   self: multi_margin_loss_backward(grad, self, target, p, margin, weight, reduction)
2016   self: multilabel_margin_loss_backward(grad, self, target, reduction, is_target)
2020   self: nll_loss_backward_symint(grad, self, target, weight, reduction, ignore_index, total_weight)
2025 …self: nll_loss2d_backward_symint(grad, self, target, weight, reduction, ignore_index, total_weight)
2030   self: smooth_l1_loss_backward(grad, self, target, reduction, beta)
2031   target: smooth_l1_loss_backward(grad, target, self, reduction, beta)
2035   self: huber_loss_backward(grad, self, target, reduction, delta)
2036   target: huber_loss_backward(grad, target, self, reduction, delta)
2040   self: soft_margin_loss_backward(grad, self, target, reduction)
2044   self: threshold_backward(grad, result, 0)
2048 …ode::is_enabled() ? infinitely_differentiable_silu_backward(grad, self) : silu_backward(grad, self…
2052 …ode::is_enabled() ? infinitely_differentiable_mish_backward(grad, self) : mish_backward(grad, self…
2056   self: elu_backward(grad, alpha, scale, input_scale, /* is_result */ false, self)
2060   self: elu_backward(grad, alpha, scale, input_scale, /* is_result */ true, result)
2064   self: elu_backward(grad, alpha, 1, 1.0/alpha.toFloat(), /* is_result */ false, self)
2068   self: elu_backward(grad, alpha, 1, 1.0/alpha.toFloat(), /* is_result */ true, result)
2072   self: gelu_backward(grad, self, approximate)
2076   grad_output: gelu_backward(grad, self, approximate)
2077   self: gelu_double_backward(grad, grad_output, self, approximate)
2083   self: glu_backward(grad, self, dim)
2087   self: hardshrink_backward(grad, self, lambd)
2091   grad_out: hardshrink_backward(grad, self, lambd)
2092   self: zeros_like(grad)
2096   self: hardtanh_backward(grad, self, min_val, max_val)
2100   self: leaky_relu_backward(grad, self, negative_slope, false)
2104   self: leaky_relu_backward(grad, result, negative_slope, true)
2108   self: log_sigmoid_backward(grad, self, buffer)
2113   self: _log_softmax_backward_data(grad, result, dim, self.scalar_type())
2117   self: _sparse_log_softmax_backward_data(grad, result, dim, self)
2120   self: _masked_softmax_backward(grad, result, mask, dim)
2124 …self, weight: "grad.defined() ? _prelu_kernel_backward(grad, self, weight) : std::tuple<Tensor, Te…
2138   self: rrelu_with_noise_backward(grad, self, noise, lower, upper, training, false)
2142   self: rrelu_with_noise_backward(grad, result, noise, lower, upper, training, true)
2145   self: _softmax_backward_data(grad, result, dim, self.scalar_type())
2149   self: _sparse_softmax_backward_data(grad, result, dim, self)
2152   self: sparse_sparse_matmul_backward(grad, self, other, 0)
2153   other: sparse_sparse_matmul_backward(grad, self, other, 1)
2156   self: softplus_backward(grad, self, beta, threshold)
2160   self: softshrink_backward(grad, self, lambd)
2164   self: threshold_backward(grad, self, threshold)
2168   self: threshold_backward(grad, self, threshold)
2172   self: reflection_pad1d_backward_symint(grad, self, padding)
2176   self: reflection_pad2d_backward_symint(grad, self, padding)
2180   self: reflection_pad3d_backward_symint(grad, self, padding)
2184   self: replication_pad1d_backward_symint(grad, self, padding)
2188   self: replication_pad2d_backward_symint(grad, self, padding)
2192   self: replication_pad3d_backward_symint(grad, self, padding)
2196 …self: upsample_linear1d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scales)
2200 …self: upsample_bilinear2d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scal…
2204 …self: _upsample_bilinear2d_aa_backward_symint(grad, output_size, self.sym_sizes(), align_corners, …
2208 …self: upsample_bicubic2d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scale…
2212 …self: _upsample_bicubic2d_aa_backward_symint(grad, output_size, self.sym_sizes(), align_corners, s…
2216 …self: upsample_trilinear3d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, sca…
2220   self: upsample_nearest1d_backward_symint(grad, output_size, self.sym_sizes(), scales)
2224   self: _upsample_nearest_exact1d_backward_symint(grad, output_size, self.sym_sizes(), scales)
2228   self: upsample_nearest2d_backward_symint(grad, output_size, self.sym_sizes(), scales_h, scales_w)
2232 …self: _upsample_nearest_exact2d_backward_symint(grad, output_size, self.sym_sizes(), scales_h, sca…
2236 …self: upsample_nearest3d_backward_symint(grad, output_size, self.sym_sizes(), scales_d, scales_h, …
2240 …self: _upsample_nearest_exact3d_backward_symint(grad, output_size, self.sym_sizes(), scales_d, sca…
2244   self: pixel_unshuffle(grad, upscale_factor)
2248   self: pixel_shuffle(grad, downscale_factor)
2252   self: _adaptive_avg_pool2d_backward(grad, self)
2256   self: _adaptive_avg_pool3d_backward(grad, self)
2260   self: adaptive_max_pool2d_backward(grad, self, result1)
2265   self: adaptive_max_pool3d_backward(grad, self, result1)
2270 …self: avg_pool2d_backward(grad, self, kernel_size, stride, padding, ceil_mode, count_include_pad, …
2274 …self: avg_pool3d_backward(grad, self, kernel_size, stride, padding, ceil_mode, count_include_pad, …
2278   self: fractional_max_pool2d_backward(grad, self, kernel_size, output_size, result1)
2283   self: fractional_max_pool3d_backward(grad, self, kernel_size, output_size, result1)
2288 …input, weight, bias: "grad.defined() ? linear_backward(input, grad, weight, grad_input_mask) : std…
2295   self: max_pool2d_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mode)
2298 …self, weight, bias: "grad.defined() ? mps_convolution_backward_symint(self, grad, weight, padding,…
2304 …self: max_pool2d_with_indices_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mo…
2309 …self: max_pool3d_with_indices_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mo…
2314   self: max_pool_double_backward(grad, indices, 2)
2319   self: max_pool_double_backward(grad, indices, 3)
2324 …input, weight, bias: "grad.defined() ? convolution_backward_symint(grad, input, weight, bias->sym_…
2331 …input, weight, bias: "grad.defined() ? convolution_backward_symint(grad, input, weight, bias->sym_…
2341 …input, weight, bias: "grad.defined() ? convolution_backward_overrideable_symint(grad, input, weigh…
2347 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2350 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2353 …self, weight, bias: "grad.defined() ? _slow_conv2d_backward_symint(grad, self, weight, kernel_size…
2359 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad.contiguous(), self, weight,…
2362 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad.contiguous(), self, weight,…
2365 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2368 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2371 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2374   self: im2col(grad, kernel_size, dilation, padding, stride)
2378 …self: col2im_symint(grad, {self.sym_size(-2), self.sym_size(-1)}, kernel_size, dilation, padding, …
2382 …grad_output: _adaptive_avg_pool2d_symint(grad, {grad_output.sym_size(-2), grad_output.sym_size(-1)…
2387 …grad_output: _adaptive_avg_pool3d_symint(grad, { grad_output.sym_size(-3), grad_output.sym_size(-2…
2392   grad_output: max_pool_double_backward(grad, indices, 2)
2397   grad_output: max_pool_double_backward(grad, indices, 3)
2402 …grad_output: avg_pool2d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_…
2407 …grad_output: avg_pool3d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_…
2412   grad_output: elu_backward(grad, alpha, scale, input_scale, is_result, self_or_result)
2413 …self_or_result: elu_double_backward(grad, grad_output, alpha, scale, input_scale, is_result, self_…
2417   grad_output: max_pool_double_backward(grad, indices, 2)
2422   grad_output: max_pool_double_backward(grad, indices, 3)
2427   grad_output: glu_double_backward_grad_output(grad, self, dim)
2428   self: glu_double_backward(grad, grad_output, self, dim)
2432   grad_output: hardtanh_backward(grad, self, min_val, max_val)
2433   self: zeros_like(grad)
2437   grad_output: log_sigmoid_backward(grad, self, buffer)
2438   self: log_sigmoid_double_backward(grad * grad_output, self)
2442   grad_output: grad.to(output.dtype()) - (grad.to(output.dtype()) * output.exp()).sum(dim, true)
2443   output: (-grad_output.sum(dim, true) * output.exp() * grad.to(output.dtype())).to(output.dtype())
2447   grad_output: leaky_relu_backward(grad, self, negative_slope, false)
2448   self: zeros_like(grad)
2475   grad_output: max_pool_double_backward(grad, indices, 2)
2481   grad_output: max_pool_double_backward(grad, indices, 3)
2487   grad_output: mse_loss_backward(grad, self, target, reduction)
2488   self: mse_loss_double_backward(grad * grad_output, self, reduction)
2489   target: -mse_loss_double_backward(grad * grad_output, target, reduction)
2496   grad_output: nll_loss_symint(grad, target, weight, reduction, ignore_index)
2497   self: zeros_like(grad)
2501   grad_output: nll_loss2d_symint(grad, target, weight, reduction, ignore_index)
2502   self: zeros_like(grad)
2507   grad_output: rrelu_with_noise_backward(grad, self, noise, lower, upper, training, false)
2508   self: zeros_like(grad)
2512   grad_output: reflection_pad1d_symint(grad, padding)
2517   grad_output: reflection_pad2d_symint(grad, padding)
2522   grad_output: reflection_pad3d_symint(grad, padding)
2527   grad_output: replication_pad1d_symint(grad, padding)
2532   grad_output: replication_pad2d_symint(grad, padding)
2537   grad_output: replication_pad3d_symint(grad, padding)
2542   self, mat1, mat2: "sparse_sampled_addmm_backward(grad,
2550 …self, other: "grad.defined() ? _sparse_mm_reduce_impl_backward(self, grad, other, reduce, result1,…
2553   grad_output: smooth_l1_loss_backward(grad, self, target, reduction, beta)
2554   self: smooth_l1_loss_double_backward(grad * grad_output, self, target, reduction, beta)
2555   target: -smooth_l1_loss_double_backward(grad * grad_output, self, target, reduction, beta)
2562 …grad_output: huber_loss_double_backward_grad_output(grad, grad_output, self, target, reduction, de…
2563   self: huber_loss_double_backward(grad * grad_output, self, target, reduction, delta)
2564   target: -huber_loss_double_backward(grad * grad_output, self, target, reduction, delta)
2567   grad_output: softplus_backward(grad, self, beta, threshold)
2568   self: softplus_double_backward(grad * grad_output, self, beta, threshold)
2573   grad_output: _softmax_backward_data(grad.to(output.dtype()), output, dim, input_dtype)
2574 …output: softmax_double_backward(grad.to(output.dtype()), grad_output, dim, output).to(output.dtype…
2577 …grad_output: soft_margin_loss_double_backward_grad_output(grad, grad_output, self, target, reducti…
2578   self: soft_margin_loss_double_backward(grad * grad_output, self, target, reduction)
2581   grad_output: softshrink_backward(grad, self, lambd)
2582   self: zeros_like(grad)
2586   grad_output: threshold_backward(grad, self, threshold)
2587   self: zeros_like(grad)
2591   grad_output: upsample_linear1d_symint(grad, output_size, align_corners, scales)
2595   grad_output: upsample_bilinear2d_symint(grad, output_size, align_corners, scales_h, scales_w)
2599   grad_output: _upsample_bilinear2d_aa_symint(grad, output_size, align_corners, scales_h, scales_w)
2603   grad_output: upsample_bicubic2d_symint(grad, output_size, align_corners, scales_h, scales_w)
2607   grad_output: _upsample_bicubic2d_aa_symint(grad, output_size, align_corners, scales_h, scales_w)
2611 …grad_output: upsample_trilinear3d_symint(grad, output_size, align_corners, scales_d, scales_h, sca…
2615   grad_output: upsample_nearest1d_symint(grad, output_size, scales)
2619   grad_output: _upsample_nearest_exact1d_symint(grad, output_size, scales)
2623   grad_output: upsample_nearest2d_symint(grad, output_size, scales_h, scales_w)
2627   grad_output: _upsample_nearest_exact2d_symint(grad, output_size, scales_h, scales_w)
2631   grad_output: upsample_nearest3d_symint(grad, output_size, scales_d, scales_h, scales_w)
2635   grad_output: _upsample_nearest_exact3d_symint(grad, output_size, scales_d, scales_h, scales_w)
2639   grad_output: sigmoid_backward(grad, output.conj())
2640   output: grad.conj() * grad_output * (-2 * output.conj() + 1)
2644   grad_output: tanh_backward(grad, output.conj())
2645   output: grad.conj() * (-2 * output.conj() * grad_output)
2650   log_probs: _cudnn_ctc_loss_backward(grad, result0, result1, zero_infinity)
2653   log_probs: _cudnn_ctc_loss_backward(grad, result0, result1, zero_infinity)
2656 …self, weight: "_cudnn_convolution_backward(self, grad, weight, padding, output_padding, stride, di…
2659 …self, weight: "grad.defined() ? mps_convolution_transpose_backward_symint(self, grad, weight, padd…
2662 …self, weight: "_cudnn_convolution_backward(self, grad, weight, padding, std::vector<c10::SymInt>(p…
2665 …self, grid: "grad.defined() ? cudnn_grid_sampler_backward(self, grid, grad) : std::tuple<Tensor, T…
2668   theta: cudnn_affine_grid_generator_backward(grad, N, C, H, W)
2678 …grad.defined() ? (training ? cudnn_batch_norm_backward(input, grad.contiguous(input.suggest_memory…
2694 …input, weight, bias: "grad.defined() ? convolution_backward_symint(grad, input, weight, bias->sym_…
2726 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2729 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2732 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2735 …grad.defined() ? (training ? miopen_batch_norm_backward(input, grad.contiguous(), weight, running_…
2759 …self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_si…
2762   self, weight, bias: mkldnn_linear_backward(self, grad, weight, grad_input_mask)
2765 …self: mkldnn_max_pool2d_backward(grad, result, self, kernel_size, stride, padding, dilation, ceil_…
2768 …self: mkldnn_max_pool3d_backward(grad, result, self, kernel_size, stride, padding, dilation, ceil_…
2771   self: mkldnn_adaptive_avg_pool2d_backward(grad, self)
2774   self: grad.reshape_symint(self.sym_sizes())
2778   list: "grad.defined()? at::unbind(grad) : std::vector<Tensor>(list.size())"
2781   t: grad.to_padded_tensor_symint(0, t.sym_sizes())
2785   padded: _nested_from_padded_backward(grad, padded, fuse_transform_0213)
2789   self: at::_nested_from_padded(grad, self._nested_tensor_size())
2793   self: grad.values()
2798   self: grad.values()
2804 …self: _nested_view_from_jagged(grad, at::_nested_get_offsets(self), at::_nested_get_jagged_dummy(s…
2809 …query, key, value, attn_bias: _scaled_dot_product_efficient_attention_backward(grad, query, key, v…
2813 …query, key, value: _scaled_dot_product_flash_attention_backward_symint(grad, query, key, value, ou…
2817 …query, key, value: _scaled_dot_product_flash_attention_for_cpu_backward(grad, query, key, value, o…
2821 …query, key, value: _flash_attention_backward_symint(grad, query, key, value, output, softmax_logsu…
2825 …query, key, value, bias: _efficient_attention_backward_symint(grad, query, key, value, bias, outpu…
2829 …query, key, value: _scaled_dot_product_cudnn_attention_backward_symint(grad, query, key, value, ou…
2833   self: fft_r2c_backward(grad, dim, normalization, onesided, self.sym_size(dim.back()))
2837   self: fft_c2r_backward(grad, dim, normalization)
2841   self: _fft_c2c_symint(grad, dim, normalization, !forward)
2854   tensors: stack_tensors_backward(grad, dim, to_args_scalartypes(tensors))
2866 …grad.defined() ? (GradMode::is_enabled() ? _thnn_differentiable_gru_cell_backward(grad, input_gate…
2870   input: _pack_padded_sequence_backward_symint(grad, input.sym_sizes(), result1, batch_first)
2916   data: _segment_reduce_backward(grad, result, data, reduce, lengths, offsets, axis, initial)
2919   self: grad
2927   self: warn_backwards(grad)
2932       self: grad.expand_symint(self.sym_sizes()) + 1
2935       self: grad.mul(grad)
2937       self: grad.expand_symint(self.sym_sizes()) * 2
2942       self: grad.mul(grad).add(grad)
2947       self: grad.reshape_as(self)
2949       self: grad.reshape_as(self) + 1
2955   self, src: scatter_reduce_backward(grad, self, dim, index, src, reduce, include_self, result)
3116   self: grad.reshape_symint(self.sym_sizes())