xref: /aosp_15_r20/external/pytorch/torch/csrc/autograd/autograd.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <torch/csrc/autograd/variable.h>
4 
5 namespace torch::autograd {
6 
7 /// Computes the sum of gradients of given tensors with respect to graph leaves.
8 ///
9 /// The graph is differentiated using the chain rule. If any of ``tensors``
10 /// are non-scalar (i.e. their data has more than one element) and require
11 /// gradient, then the Jacobian-vector product would be computed, in this case
12 /// the function additionally requires specifying `grad_tensors`. It should be a
13 /// sequence of matching length, that contains the "vector" in the
14 /// Jacobian-vector product, usually the gradient of the differentiated function
15 /// w.r.t. corresponding tensors
16 /// (`torch::Tensor()` is an acceptable value for all tensors that don't need
17 /// gradient tensors).
18 ///
19 /// This function accumulates gradients in the leaves - you might need to zero
20 /// them before calling it.
21 ///
22 /// \param tensors Tensors of which the derivative will be computed.
23 /// \param grad_tensors The "vector" in the Jacobian-vector product, usually
24 /// gradients
25 ///     w.r.t. each element of corresponding tensors. `torch::Tensor()` values
26 ///     can be specified for scalar Tensors or ones that don't require grad. If
27 ///     a `torch::Tensor()` value would be acceptable for all grad_tensors, then
28 ///     this argument is optional.
29 /// \param retain_graph If `false`, the graph used to compute the grad will be
30 /// freed.
31 ///     Note that in nearly all cases setting this option to `true` is not
32 ///     needed and often can be worked around in a much more efficient way.
33 ///     Defaults to the value of `create_graph`.
34 /// \param create_graph If `true`, graph of the derivative will be constructed,
35 /// allowing
36 ///     to compute higher order derivative products. Defaults to `false`.
37 /// \param inputs Inputs w.r.t. which the gradient will be accumulated into
38 ///     `at::Tensor::grad`. All other Tensors will be ignored. If not provided,
39 ///     the gradient is accumulated into all the leaf Tensors that were used to
40 ///     compute param `tensors`.
41 //      When inputs are provided and a given input is not a leaf,
42 //      the current implementation will call its grad_fn (even though it is not
43 //      strictly needed to get this gradients). It is an implementation detail
44 //      on which the user should not rely. See
45 //      https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for
46 //      more details.
47 TORCH_API void backward(
48     const variable_list& tensors,
49     const variable_list& grad_tensors = {},
50     std::optional<bool> retain_graph = std::nullopt,
51     bool create_graph = false,
52     const variable_list& inputs = {});
53 
54 /// Computes and returns the sum of gradients of outputs with respect to the
55 /// inputs.
56 ///
57 /// ``grad_outputs`` should be a sequence of length matching ``output``
58 /// containing the "vector" in Jacobian-vector product, usually the pre-computed
59 /// gradients w.r.t. each of the outputs. If an output doesn't require_grad,
60 /// then the gradient can be ``torch::Tensor()``).
61 ///
62 /// \param outputs outputs of the differentiated function.
63 /// \param inputs Inputs w.r.t. which the gradient will be
64 ///     returned (and not accumulated into ``at::Tensor::grad``).
65 /// \param grad_outputs The "vector" in the Jacobian-vector product.
66 ///     Usually gradients w.r.t. each output. `torch::Tensor()` values can be
67 ///     specified for scalar Tensors or ones that don't require grad. If a
68 ///     `torch::Tensor()` value would be acceptable for all grad_tensors, then
69 ///     this argument is optional. Default: `{}`.
70 /// \param retain_graph If ``false``, the graph used to compute the grad
71 ///     will be freed. Note that in nearly all cases setting this option to
72 ///     ``true`` is not needed and often can be worked around in a much more
73 ///     efficient way. Defaults to the value of ``create_graph``.
74 /// \param create_graph If ``true``, graph of the derivative will
75 ///     be constructed, allowing to compute higher order derivative products.
76 ///     Default: ``false``.
77 /// \param allow_unused If ``false``, specifying inputs that were not
78 ///     used when computing outputs (and therefore their grad is always zero)
79 ///     is an error. Defaults to ``false``.
80 TORCH_API variable_list grad(
81     const variable_list& outputs,
82     const variable_list& inputs,
83     const variable_list& grad_outputs = {},
84     std::optional<bool> retain_graph = std::nullopt,
85     bool create_graph = false,
86     bool allow_unused = false);
87 
88 namespace forward_ad {
89 
90 /// Creates a new dual level and returns its index. This level index should then
91 /// be used to call into the other functions below. This API supports entering a
92 /// new level before the previous one is exited. We call them nested forward AD
93 /// levels. These can be used to compute higher order derivatives.
94 TORCH_API uint64_t enter_dual_level();
95 
96 /// Exits the given level. This will clear up all the gradients from this level
97 /// and all dual Tensors that had gradients for this level will become regular
98 /// Tensors again. This function can only be used to exit the innermost nesting
99 /// level and so exiting must happen in reverse order compared to the entering
100 /// that was done with the function above.
101 TORCH_API void exit_dual_level(uint64_t level);
102 
103 } // namespace forward_ad
104 } // namespace torch::autograd
105