xref: /aosp_15_r20/external/tensorflow/tensorflow/c/eager/gradient_checker.cc (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/c/eager/gradient_checker.h"
16 
17 #include <memory>
18 
19 #include "absl/types/span.h"
20 #include "tensorflow/c/eager/abstract_tensor_handle.h"
21 #include "tensorflow/c/experimental/ops/math_ops.h"
22 #include "tensorflow/c/tf_tensor.h"
23 
24 namespace tensorflow {
25 namespace gradients {
26 
27 using namespace std;
28 
29 // ================== Helper functions =================
30 
31 // Fills data with values [start,end) with given step size.
Range(vector<int32_t> * data,int32_t start,int32_t end,int32_t step=1)32 void Range(vector<int32_t>* data, int32_t start, int32_t end,
33            int32_t step = 1) {
34   for (int32_t i = start; i < end; i += step) {
35     (*data)[i] = i;
36   }
37 }
38 
39 // Fills out_dims with the dimensions of the given tensor.
GetDims(const TF_Tensor * t,int64_t * out_dims)40 void GetDims(const TF_Tensor* t, int64_t* out_dims) {
41   int num_dims = TF_NumDims(t);
42   for (int i = 0; i < num_dims; i++) {
43     out_dims[i] = TF_Dim(t, i);
44   }
45 }
46 
47 // Runs model as is if output is a scalar,
48 // else sums the output tensor before returning.
RunAndMaybeSum(AbstractContext * ctx,Model forward,absl::Span<AbstractTensorHandle * const> inputs,absl::Span<AbstractTensorHandle * > outputs,bool use_function)49 Status RunAndMaybeSum(AbstractContext* ctx, Model forward,
50                       absl::Span<AbstractTensorHandle* const> inputs,
51                       absl::Span<AbstractTensorHandle*> outputs,
52                       bool use_function) {
53   AbstractTensorHandle* model_outputs[1];
54 
55   // Run the model.
56   TF_RETURN_IF_ERROR(
57       RunModel(forward, ctx, inputs, model_outputs, use_function));
58   AbstractTensorHandlePtr model_out(model_outputs[0]);
59 
60   TF_Tensor* model_out_tensor;
61   TF_RETURN_IF_ERROR(GetValue(model_out.get(), &model_out_tensor));
62   int num_dims_out = TF_NumDims(model_out_tensor);
63   TF_DeleteTensor(model_out_tensor);
64 
65   // If the output is a scalar, then return the scalar output
66   if (num_dims_out == 0) {
67     outputs[0] = model_out.release();
68     return OkStatus();
69   }
70 
71   // Else, reduce sum the output to get a scalar
72 
73   // Will sum all dimensions, so get a Tensor containing [0,...,num_dims_out-1].
74   AbstractTensorHandlePtr sum_dims;
75   {
76     vector<int32_t> vals(num_dims_out);
77     int64_t vals_shape[] = {num_dims_out};
78     Range(&vals, 0, num_dims_out);
79     AbstractTensorHandle* sum_dims_raw = nullptr;
80     TF_RETURN_IF_ERROR(TestTensorHandleWithDims<int32_t, TF_INT32>(
81         ctx, vals.data(), vals_shape, 1, &sum_dims_raw));
82     sum_dims.reset(sum_dims_raw);
83   }
84 
85   // Reduce sum the output on all dimensions.
86   TF_RETURN_IF_ERROR(ops::Sum(ctx, model_out.get(), sum_dims.get(), &outputs[0],
87                               /*keep_dims=*/false, "sum_output"));
88   return OkStatus();
89 }
90 // ========================= End Helper Functions==============================
91 
CalcNumericalGrad(AbstractContext * ctx,Model forward,absl::Span<AbstractTensorHandle * const> inputs,int input_index,bool use_function,AbstractTensorHandle ** numerical_grad)92 Status CalcNumericalGrad(AbstractContext* ctx, Model forward,
93                          absl::Span<AbstractTensorHandle* const> inputs,
94                          int input_index, bool use_function,
95                          AbstractTensorHandle** numerical_grad) {
96   vector<AbstractTensorHandle*> theta_inputs(inputs.size());
97   for (int i{}; i < inputs.size(); ++i) {
98     theta_inputs[i] = inputs[i];
99   }
100 
101   AbstractTensorHandle* theta =
102       theta_inputs[input_index];  // parameter we are grad checking
103 
104   // Convert from AbstractTensor to TF_Tensor.
105   TF_Tensor* theta_tensor;
106   TF_RETURN_IF_ERROR(GetValue(theta, &theta_tensor));
107 
108   // Get number of elements and fill data.
109   int num_elems = TF_TensorElementCount(theta_tensor);
110   vector<float> theta_data(num_elems);
111   memcpy(theta_data.data(), TF_TensorData(theta_tensor),
112          TF_TensorByteSize(theta_tensor));
113 
114   // Initialize space for the numerical gradient.
115   vector<float> dtheta_approx(num_elems);
116 
117   // Get theta shape and store in theta_dims.
118   int num_dims = TF_NumDims(theta_tensor);
119   vector<int64_t> theta_dims(num_dims);
120   GetDims(theta_tensor, theta_dims.data());
121 
122   // Initialize auxilary data structures.
123   vector<float> thetaPlus_data(num_elems);
124   vector<float> thetaMinus_data(num_elems);
125   AbstractTensorHandle* f_outputs[1];
126 
127   // Numerical Grad Check
128   for (int i = 0; i < num_elems; i++) {
129     // Get relative epsilon value
130     float epsilon = theta_data[i] == 0 ? 1e-4 : std::abs(theta_data[i] * 1e-4);
131     AbstractTensorHandlePtr two_eps;
132     {
133       AbstractTensorHandle* two_eps_raw = nullptr;
134       TF_RETURN_IF_ERROR(TestScalarTensorHandle<float, TF_FLOAT>(
135           ctx, 2 * epsilon, &two_eps_raw));
136       two_eps.reset(two_eps_raw);
137     }
138 
139     // Initialize theta[i] + epsilon.
140     memcpy(thetaPlus_data.data(), TF_TensorData(theta_tensor),
141            TF_TensorByteSize(theta_tensor));
142     thetaPlus_data[i] += epsilon;
143     AbstractTensorHandlePtr thetaPlus;
144     {
145       AbstractTensorHandle* thetaPlus_raw = nullptr;
146       TF_RETURN_IF_ERROR(TestTensorHandleWithDims<float, TF_FLOAT>(
147           ctx, thetaPlus_data.data(), theta_dims.data(), num_dims,
148           &thetaPlus_raw));
149       thetaPlus.reset(thetaPlus_raw);
150     }
151 
152     // Initialize theta[i] - epsilon.
153     memcpy(&thetaMinus_data[0], TF_TensorData(theta_tensor),
154            TF_TensorByteSize(theta_tensor));
155     thetaMinus_data[i] -= epsilon;
156     AbstractTensorHandlePtr thetaMinus;
157     {
158       AbstractTensorHandle* thetaMinus_raw = nullptr;
159       TF_RETURN_IF_ERROR(TestTensorHandleWithDims<float, TF_FLOAT>(
160           ctx, thetaMinus_data.data(), theta_dims.data(), num_dims,
161           &thetaMinus_raw));
162       thetaMinus.reset(thetaMinus_raw);
163     }
164 
165     // Get f(theta + eps):
166     theta_inputs[input_index] = thetaPlus.get();
167     TF_RETURN_IF_ERROR(
168         RunAndMaybeSum(ctx, forward, theta_inputs, f_outputs, use_function));
169     AbstractTensorHandlePtr fPlus(f_outputs[0]);
170 
171     // Get f(theta - eps):
172     theta_inputs[input_index] = thetaMinus.get();
173     TF_RETURN_IF_ERROR(
174         RunAndMaybeSum(ctx, forward, theta_inputs, f_outputs, use_function));
175     AbstractTensorHandlePtr fMinus(f_outputs[0]);
176 
177     // Take Difference of both estimates: (f(theta + eps) - f(theta - eps)).
178     TF_RETURN_IF_ERROR(
179         ops::Sub(ctx, fPlus.get(), fMinus.get(), f_outputs, "sub_top"));
180     AbstractTensorHandlePtr fDiff(f_outputs[0]);
181 
182     // Calculate using the difference quotient definition:
183     // (f(theta + eps) - f(theta - eps)) / (2 * eps).
184     TF_RETURN_IF_ERROR(
185         ops::Div(ctx, fDiff.get(), two_eps.get(), f_outputs, "diff_quotient"));
186     AbstractTensorHandlePtr diff_quotient(f_outputs[0]);
187 
188     TF_Tensor* grad_tensor;
189     TF_RETURN_IF_ERROR(GetValue(diff_quotient.get(), &grad_tensor));
190     float grad_data[1];
191     memcpy(&grad_data[0], TF_TensorData(grad_tensor),
192            TF_TensorByteSize(grad_tensor));
193     TF_DeleteTensor(grad_tensor);
194     dtheta_approx[i] = grad_data[0];
195   }
196 
197   // Populate *numerical_grad with the data from dtheta_approx.
198   TF_RETURN_IF_ERROR(TestTensorHandleWithDims<float, TF_FLOAT>(
199       ctx, dtheta_approx.data(), theta_dims.data(), num_dims, numerical_grad));
200   TF_DeleteTensor(theta_tensor);
201   return OkStatus();
202 }
203 
204 }  // namespace gradients
205 }  // namespace tensorflow
206