1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/c/eager/gradient_checker.h"
16
17 #include <memory>
18
19 #include "absl/types/span.h"
20 #include "tensorflow/c/eager/abstract_tensor_handle.h"
21 #include "tensorflow/c/experimental/ops/math_ops.h"
22 #include "tensorflow/c/tf_tensor.h"
23
24 namespace tensorflow {
25 namespace gradients {
26
27 using namespace std;
28
29 // ================== Helper functions =================
30
31 // Fills data with values [start,end) with given step size.
Range(vector<int32_t> * data,int32_t start,int32_t end,int32_t step=1)32 void Range(vector<int32_t>* data, int32_t start, int32_t end,
33 int32_t step = 1) {
34 for (int32_t i = start; i < end; i += step) {
35 (*data)[i] = i;
36 }
37 }
38
39 // Fills out_dims with the dimensions of the given tensor.
GetDims(const TF_Tensor * t,int64_t * out_dims)40 void GetDims(const TF_Tensor* t, int64_t* out_dims) {
41 int num_dims = TF_NumDims(t);
42 for (int i = 0; i < num_dims; i++) {
43 out_dims[i] = TF_Dim(t, i);
44 }
45 }
46
47 // Runs model as is if output is a scalar,
48 // else sums the output tensor before returning.
RunAndMaybeSum(AbstractContext * ctx,Model forward,absl::Span<AbstractTensorHandle * const> inputs,absl::Span<AbstractTensorHandle * > outputs,bool use_function)49 Status RunAndMaybeSum(AbstractContext* ctx, Model forward,
50 absl::Span<AbstractTensorHandle* const> inputs,
51 absl::Span<AbstractTensorHandle*> outputs,
52 bool use_function) {
53 AbstractTensorHandle* model_outputs[1];
54
55 // Run the model.
56 TF_RETURN_IF_ERROR(
57 RunModel(forward, ctx, inputs, model_outputs, use_function));
58 AbstractTensorHandlePtr model_out(model_outputs[0]);
59
60 TF_Tensor* model_out_tensor;
61 TF_RETURN_IF_ERROR(GetValue(model_out.get(), &model_out_tensor));
62 int num_dims_out = TF_NumDims(model_out_tensor);
63 TF_DeleteTensor(model_out_tensor);
64
65 // If the output is a scalar, then return the scalar output
66 if (num_dims_out == 0) {
67 outputs[0] = model_out.release();
68 return OkStatus();
69 }
70
71 // Else, reduce sum the output to get a scalar
72
73 // Will sum all dimensions, so get a Tensor containing [0,...,num_dims_out-1].
74 AbstractTensorHandlePtr sum_dims;
75 {
76 vector<int32_t> vals(num_dims_out);
77 int64_t vals_shape[] = {num_dims_out};
78 Range(&vals, 0, num_dims_out);
79 AbstractTensorHandle* sum_dims_raw = nullptr;
80 TF_RETURN_IF_ERROR(TestTensorHandleWithDims<int32_t, TF_INT32>(
81 ctx, vals.data(), vals_shape, 1, &sum_dims_raw));
82 sum_dims.reset(sum_dims_raw);
83 }
84
85 // Reduce sum the output on all dimensions.
86 TF_RETURN_IF_ERROR(ops::Sum(ctx, model_out.get(), sum_dims.get(), &outputs[0],
87 /*keep_dims=*/false, "sum_output"));
88 return OkStatus();
89 }
90 // ========================= End Helper Functions==============================
91
CalcNumericalGrad(AbstractContext * ctx,Model forward,absl::Span<AbstractTensorHandle * const> inputs,int input_index,bool use_function,AbstractTensorHandle ** numerical_grad)92 Status CalcNumericalGrad(AbstractContext* ctx, Model forward,
93 absl::Span<AbstractTensorHandle* const> inputs,
94 int input_index, bool use_function,
95 AbstractTensorHandle** numerical_grad) {
96 vector<AbstractTensorHandle*> theta_inputs(inputs.size());
97 for (int i{}; i < inputs.size(); ++i) {
98 theta_inputs[i] = inputs[i];
99 }
100
101 AbstractTensorHandle* theta =
102 theta_inputs[input_index]; // parameter we are grad checking
103
104 // Convert from AbstractTensor to TF_Tensor.
105 TF_Tensor* theta_tensor;
106 TF_RETURN_IF_ERROR(GetValue(theta, &theta_tensor));
107
108 // Get number of elements and fill data.
109 int num_elems = TF_TensorElementCount(theta_tensor);
110 vector<float> theta_data(num_elems);
111 memcpy(theta_data.data(), TF_TensorData(theta_tensor),
112 TF_TensorByteSize(theta_tensor));
113
114 // Initialize space for the numerical gradient.
115 vector<float> dtheta_approx(num_elems);
116
117 // Get theta shape and store in theta_dims.
118 int num_dims = TF_NumDims(theta_tensor);
119 vector<int64_t> theta_dims(num_dims);
120 GetDims(theta_tensor, theta_dims.data());
121
122 // Initialize auxilary data structures.
123 vector<float> thetaPlus_data(num_elems);
124 vector<float> thetaMinus_data(num_elems);
125 AbstractTensorHandle* f_outputs[1];
126
127 // Numerical Grad Check
128 for (int i = 0; i < num_elems; i++) {
129 // Get relative epsilon value
130 float epsilon = theta_data[i] == 0 ? 1e-4 : std::abs(theta_data[i] * 1e-4);
131 AbstractTensorHandlePtr two_eps;
132 {
133 AbstractTensorHandle* two_eps_raw = nullptr;
134 TF_RETURN_IF_ERROR(TestScalarTensorHandle<float, TF_FLOAT>(
135 ctx, 2 * epsilon, &two_eps_raw));
136 two_eps.reset(two_eps_raw);
137 }
138
139 // Initialize theta[i] + epsilon.
140 memcpy(thetaPlus_data.data(), TF_TensorData(theta_tensor),
141 TF_TensorByteSize(theta_tensor));
142 thetaPlus_data[i] += epsilon;
143 AbstractTensorHandlePtr thetaPlus;
144 {
145 AbstractTensorHandle* thetaPlus_raw = nullptr;
146 TF_RETURN_IF_ERROR(TestTensorHandleWithDims<float, TF_FLOAT>(
147 ctx, thetaPlus_data.data(), theta_dims.data(), num_dims,
148 &thetaPlus_raw));
149 thetaPlus.reset(thetaPlus_raw);
150 }
151
152 // Initialize theta[i] - epsilon.
153 memcpy(&thetaMinus_data[0], TF_TensorData(theta_tensor),
154 TF_TensorByteSize(theta_tensor));
155 thetaMinus_data[i] -= epsilon;
156 AbstractTensorHandlePtr thetaMinus;
157 {
158 AbstractTensorHandle* thetaMinus_raw = nullptr;
159 TF_RETURN_IF_ERROR(TestTensorHandleWithDims<float, TF_FLOAT>(
160 ctx, thetaMinus_data.data(), theta_dims.data(), num_dims,
161 &thetaMinus_raw));
162 thetaMinus.reset(thetaMinus_raw);
163 }
164
165 // Get f(theta + eps):
166 theta_inputs[input_index] = thetaPlus.get();
167 TF_RETURN_IF_ERROR(
168 RunAndMaybeSum(ctx, forward, theta_inputs, f_outputs, use_function));
169 AbstractTensorHandlePtr fPlus(f_outputs[0]);
170
171 // Get f(theta - eps):
172 theta_inputs[input_index] = thetaMinus.get();
173 TF_RETURN_IF_ERROR(
174 RunAndMaybeSum(ctx, forward, theta_inputs, f_outputs, use_function));
175 AbstractTensorHandlePtr fMinus(f_outputs[0]);
176
177 // Take Difference of both estimates: (f(theta + eps) - f(theta - eps)).
178 TF_RETURN_IF_ERROR(
179 ops::Sub(ctx, fPlus.get(), fMinus.get(), f_outputs, "sub_top"));
180 AbstractTensorHandlePtr fDiff(f_outputs[0]);
181
182 // Calculate using the difference quotient definition:
183 // (f(theta + eps) - f(theta - eps)) / (2 * eps).
184 TF_RETURN_IF_ERROR(
185 ops::Div(ctx, fDiff.get(), two_eps.get(), f_outputs, "diff_quotient"));
186 AbstractTensorHandlePtr diff_quotient(f_outputs[0]);
187
188 TF_Tensor* grad_tensor;
189 TF_RETURN_IF_ERROR(GetValue(diff_quotient.get(), &grad_tensor));
190 float grad_data[1];
191 memcpy(&grad_data[0], TF_TensorData(grad_tensor),
192 TF_TensorByteSize(grad_tensor));
193 TF_DeleteTensor(grad_tensor);
194 dtheta_approx[i] = grad_data[0];
195 }
196
197 // Populate *numerical_grad with the data from dtheta_approx.
198 TF_RETURN_IF_ERROR(TestTensorHandleWithDims<float, TF_FLOAT>(
199 ctx, dtheta_approx.data(), theta_dims.data(), num_dims, numerical_grad));
200 TF_DeleteTensor(theta_tensor);
201 return OkStatus();
202 }
203
204 } // namespace gradients
205 } // namespace tensorflow
206