1 #pragma once 2 3 #include <torch/csrc/autograd/variable.h> 4 5 namespace torch::autograd { 6 7 /// Computes the sum of gradients of given tensors with respect to graph leaves. 8 /// 9 /// The graph is differentiated using the chain rule. If any of ``tensors`` 10 /// are non-scalar (i.e. their data has more than one element) and require 11 /// gradient, then the Jacobian-vector product would be computed, in this case 12 /// the function additionally requires specifying `grad_tensors`. It should be a 13 /// sequence of matching length, that contains the "vector" in the 14 /// Jacobian-vector product, usually the gradient of the differentiated function 15 /// w.r.t. corresponding tensors 16 /// (`torch::Tensor()` is an acceptable value for all tensors that don't need 17 /// gradient tensors). 18 /// 19 /// This function accumulates gradients in the leaves - you might need to zero 20 /// them before calling it. 21 /// 22 /// \param tensors Tensors of which the derivative will be computed. 23 /// \param grad_tensors The "vector" in the Jacobian-vector product, usually 24 /// gradients 25 /// w.r.t. each element of corresponding tensors. `torch::Tensor()` values 26 /// can be specified for scalar Tensors or ones that don't require grad. If 27 /// a `torch::Tensor()` value would be acceptable for all grad_tensors, then 28 /// this argument is optional. 29 /// \param retain_graph If `false`, the graph used to compute the grad will be 30 /// freed. 31 /// Note that in nearly all cases setting this option to `true` is not 32 /// needed and often can be worked around in a much more efficient way. 33 /// Defaults to the value of `create_graph`. 34 /// \param create_graph If `true`, graph of the derivative will be constructed, 35 /// allowing 36 /// to compute higher order derivative products. Defaults to `false`. 37 /// \param inputs Inputs w.r.t. which the gradient will be accumulated into 38 /// `at::Tensor::grad`. All other Tensors will be ignored. If not provided, 39 /// the gradient is accumulated into all the leaf Tensors that were used to 40 /// compute param `tensors`. 41 // When inputs are provided and a given input is not a leaf, 42 // the current implementation will call its grad_fn (even though it is not 43 // strictly needed to get this gradients). It is an implementation detail 44 // on which the user should not rely. See 45 // https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for 46 // more details. 47 TORCH_API void backward( 48 const variable_list& tensors, 49 const variable_list& grad_tensors = {}, 50 std::optional<bool> retain_graph = std::nullopt, 51 bool create_graph = false, 52 const variable_list& inputs = {}); 53 54 /// Computes and returns the sum of gradients of outputs with respect to the 55 /// inputs. 56 /// 57 /// ``grad_outputs`` should be a sequence of length matching ``output`` 58 /// containing the "vector" in Jacobian-vector product, usually the pre-computed 59 /// gradients w.r.t. each of the outputs. If an output doesn't require_grad, 60 /// then the gradient can be ``torch::Tensor()``). 61 /// 62 /// \param outputs outputs of the differentiated function. 63 /// \param inputs Inputs w.r.t. which the gradient will be 64 /// returned (and not accumulated into ``at::Tensor::grad``). 65 /// \param grad_outputs The "vector" in the Jacobian-vector product. 66 /// Usually gradients w.r.t. each output. `torch::Tensor()` values can be 67 /// specified for scalar Tensors or ones that don't require grad. If a 68 /// `torch::Tensor()` value would be acceptable for all grad_tensors, then 69 /// this argument is optional. Default: `{}`. 70 /// \param retain_graph If ``false``, the graph used to compute the grad 71 /// will be freed. Note that in nearly all cases setting this option to 72 /// ``true`` is not needed and often can be worked around in a much more 73 /// efficient way. Defaults to the value of ``create_graph``. 74 /// \param create_graph If ``true``, graph of the derivative will 75 /// be constructed, allowing to compute higher order derivative products. 76 /// Default: ``false``. 77 /// \param allow_unused If ``false``, specifying inputs that were not 78 /// used when computing outputs (and therefore their grad is always zero) 79 /// is an error. Defaults to ``false``. 80 TORCH_API variable_list grad( 81 const variable_list& outputs, 82 const variable_list& inputs, 83 const variable_list& grad_outputs = {}, 84 std::optional<bool> retain_graph = std::nullopt, 85 bool create_graph = false, 86 bool allow_unused = false); 87 88 namespace forward_ad { 89 90 /// Creates a new dual level and returns its index. This level index should then 91 /// be used to call into the other functions below. This API supports entering a 92 /// new level before the previous one is exited. We call them nested forward AD 93 /// levels. These can be used to compute higher order derivatives. 94 TORCH_API uint64_t enter_dual_level(); 95 96 /// Exits the given level. This will clear up all the gradients from this level 97 /// and all dual Tensors that had gradients for this level will become regular 98 /// Tensors again. This function can only be used to exit the innermost nesting 99 /// level and so exiting must happen in reverse order compared to the entering 100 /// that was done with the function above. 101 TORCH_API void exit_dual_level(uint64_t level); 102 103 } // namespace forward_ad 104 } // namespace torch::autograd 105