1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <vector>
17
18 #include "tensorflow/cc/client/client_session.h"
19 #include "tensorflow/cc/framework/ops.h"
20 #include "tensorflow/cc/ops/array_ops.h"
21 #include "tensorflow/cc/ops/const_op.h"
22 #include "tensorflow/core/framework/common_shape_fns.h"
23 #include "tensorflow/core/framework/node_def_builder.h"
24 #include "tensorflow/core/framework/node_def_util.h"
25 #include "tensorflow/core/framework/op_kernel.h"
26 #include "tensorflow/core/framework/register_types.h"
27 #include "tensorflow/core/framework/shape_inference.h"
28 #include "tensorflow/core/framework/tensor.h"
29 #include "tensorflow/core/framework/tensor_util.h"
30 #include "tensorflow/core/framework/types.h"
31 #include "tensorflow/core/framework/variant.h"
32 #include "tensorflow/core/framework/variant_encode_decode.h"
33 #include "tensorflow/core/framework/variant_op_registry.h"
34 #include "tensorflow/core/framework/variant_tensor_data.h"
35 #include "tensorflow/core/graph/node_builder.h"
36 #include "tensorflow/core/lib/core/status_test_util.h"
37 #include "tensorflow/core/lib/strings/str_util.h"
38 #include "tensorflow/core/platform/test.h"
39 #include "tensorflow/core/util/port.h"
40
41 namespace tensorflow {
42
43 namespace {
44
GetCopyCPUToGPUCounter()45 static int* GetCopyCPUToGPUCounter() {
46 static int* counter = new int(0);
47 return counter;
48 }
49
GetCopyGPUToCPUCounter()50 static int* GetCopyGPUToCPUCounter() {
51 static int* counter = new int(0);
52 return counter;
53 }
54
GetCopyGPUToGPUCounter()55 static int* GetCopyGPUToGPUCounter() {
56 static int* counter = new int(0);
57 return counter;
58 }
59
60 struct StoredTensorValue {
61 Tensor stored;
TypeNametensorflow::__anon61e811800111::StoredTensorValue62 string TypeName() const { return "StoredTensorValue"; }
Encodetensorflow::__anon61e811800111::StoredTensorValue63 void Encode(VariantTensorData* data) const { data->tensors_ = {stored}; }
Decodetensorflow::__anon61e811800111::StoredTensorValue64 bool Decode(const VariantTensorData& data) {
65 CHECK_EQ(1, data.tensors_.size());
66 stored = data.tensors_[0];
67 return true;
68 }
CopyCPUToGPUtensorflow::__anon61e811800111::StoredTensorValue69 static Status CopyCPUToGPU(
70 const StoredTensorValue& from, StoredTensorValue* to,
71 const std::function<Status(const Tensor&, Tensor*)>& copy) {
72 ++*GetCopyCPUToGPUCounter();
73 return copy(from.stored, &(to->stored));
74 }
CopyGPUToCPUtensorflow::__anon61e811800111::StoredTensorValue75 static Status CopyGPUToCPU(
76 const StoredTensorValue& from, StoredTensorValue* to,
77 const std::function<Status(const Tensor&, Tensor*)>& copy) {
78 ++*GetCopyGPUToCPUCounter();
79 return copy(from.stored, &(to->stored));
80 }
CopyGPUToGPUtensorflow::__anon61e811800111::StoredTensorValue81 static Status CopyGPUToGPU(
82 const StoredTensorValue& from, StoredTensorValue* to,
83 const std::function<Status(const Tensor&, Tensor*)>& copy) {
84 ++*GetCopyGPUToGPUCounter();
85 return copy(from.stored, &(to->stored));
86 }
87 };
88
89 REGISTER_UNARY_VARIANT_DECODE_FUNCTION(StoredTensorValue, "StoredTensorValue");
90
91 INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(
92 StoredTensorValue, VariantDeviceCopyDirection::HOST_TO_DEVICE,
93 StoredTensorValue::CopyCPUToGPU);
94
95 INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(
96 StoredTensorValue, VariantDeviceCopyDirection::DEVICE_TO_HOST,
97 StoredTensorValue::CopyGPUToCPU);
98
99 INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(
100 StoredTensorValue, VariantDeviceCopyDirection::DEVICE_TO_DEVICE,
101 StoredTensorValue::CopyGPUToGPU);
102
103 REGISTER_OP("CreateTestVariant")
104 .Input("input: T")
105 .Attr("T: type")
106 .Output("output: variant")
107 .SetShapeFn(shape_inference::UnknownShape);
108
109 class CreateTestVariantOp : public OpKernel {
110 public:
CreateTestVariantOp(OpKernelConstruction * c)111 explicit CreateTestVariantOp(OpKernelConstruction* c) : OpKernel(c) {}
Compute(OpKernelContext * c)112 void Compute(OpKernelContext* c) override {
113 // Take the scalar tensor fed as input, and emit a Tensor
114 // containing 10 Variants (StoredTensorValues), both containing
115 // the input tensor.
116 const Tensor& stored_t = c->input(0);
117 Tensor* out;
118 OP_REQUIRES_OK(c, c->allocate_output(0, TensorShape({10}), &out));
119 StoredTensorValue store{stored_t};
120 auto t = out->flat<Variant>();
121 for (int i = 0; i < 10; ++i) {
122 t(i) = store;
123 }
124 CHECK_EQ("StoredTensorValue", t(0).TypeName());
125 }
126 };
127
128 REGISTER_KERNEL_BUILDER(Name("CreateTestVariant").Device(DEVICE_CPU),
129 CreateTestVariantOp);
130
131 class CreateTestVariant {
132 public:
CreateTestVariant(const::tensorflow::Scope & scope,const Input & value)133 explicit CreateTestVariant(const ::tensorflow::Scope& scope,
134 const Input& value) {
135 if (!scope.ok()) return;
136 auto _value = ops::AsNodeOut(scope, value);
137 if (!scope.ok()) return;
138 ::tensorflow::Node* ret;
139 const auto unique_name = scope.GetUniqueNameForOp("CreateTestVariant");
140 auto builder = ::tensorflow::NodeBuilder(unique_name, "CreateTestVariant")
141 .Input(_value);
142 scope.UpdateBuilder(&builder);
143 scope.UpdateStatus(builder.Finalize(scope.graph(), &ret));
144 if (!scope.ok()) return;
145 scope.UpdateStatus(scope.DoShapeInference(ret));
146 if (!scope.ok()) return;
147 this->output_ = Output(ret, 0);
148 }
149
150 // Intentionally not marked as explicit.
151 // NOLINTNEXTLINE google-explicit-constructor
operator ::tensorflow::Output() const152 operator ::tensorflow::Output() const { return output_; }
153 // Intentionally not marked as explicit.
154 // NOLINTNEXTLINE google-explicit-constructor
operator ::tensorflow::Input() const155 operator ::tensorflow::Input() const { return output_; }
156
node() const157 ::tensorflow::Node* node() const { return output_.node(); }
158
159 ::tensorflow::Output output_;
160 };
161
162 } // end namespace
163
TEST(VariantOpCopyTest,CreateConstOnCPU)164 TEST(VariantOpCopyTest, CreateConstOnCPU) {
165 Scope root = Scope::NewRootScope().WithDevice("/cpu:0");
166
167 // Create the input StoredTensorValue and serialize it.
168 StoredTensorValue from;
169 from.stored = Tensor(DT_INT64, TensorShape({}));
170 from.stored.scalar<int64_t>()() = 0xdeadbeef;
171 VariantTensorData data;
172 data.set_type_name(from.TypeName());
173 from.Encode(&data);
174
175 TensorProto variant_proto;
176 variant_proto.set_dtype(DT_VARIANT);
177 TensorShape scalar_shape({});
178 scalar_shape.AsProto(variant_proto.mutable_tensor_shape());
179 data.ToProto(variant_proto.add_variant_val());
180
181 Output create_const = ops::ConstFromProto(root, variant_proto);
182 TF_ASSERT_OK(root.status());
183 ClientSession session(root);
184 std::vector<Tensor> outputs;
185 TF_CHECK_OK(session.Run({create_const}, &outputs));
186 EXPECT_EQ(1, outputs.size());
187 EXPECT_EQ(DT_VARIANT, outputs[0].dtype());
188 EXPECT_EQ(0, outputs[0].dims());
189 const Variant& variant = outputs[0].scalar<Variant>()();
190 EXPECT_EQ("StoredTensorValue", variant.TypeName());
191 const StoredTensorValue* to = variant.get<StoredTensorValue>();
192 EXPECT_EQ(to->stored.dtype(), DT_INT64);
193 EXPECT_EQ(0xdeadbeef, to->stored.scalar<int64_t>()());
194 }
195
TEST(VariantOpCopyTest,CreateConstOnGPU)196 TEST(VariantOpCopyTest, CreateConstOnGPU) {
197 if (!IsGoogleCudaEnabled()) return;
198
199 Scope root = Scope::NewRootScope().WithDevice("/gpu:0");
200
201 // Create the input StoredTensorValue and serialize it.
202 StoredTensorValue from;
203 from.stored = Tensor(DT_INT64, TensorShape({}));
204 from.stored.scalar<int64_t>()() = 0xdeadbeef;
205 VariantTensorData data;
206 data.set_type_name(from.TypeName());
207 from.Encode(&data);
208
209 TensorProto variant_proto;
210 variant_proto.set_dtype(DT_VARIANT);
211 TensorShape scalar_shape({});
212 scalar_shape.AsProto(variant_proto.mutable_tensor_shape());
213 data.ToProto(variant_proto.add_variant_val());
214
215 Output create_const = ops::ConstFromProto(root, variant_proto);
216 TF_ASSERT_OK(root.status());
217 ClientSession session(root);
218 std::vector<Tensor> outputs;
219
220 int copy_to_gpu_before = *GetCopyCPUToGPUCounter();
221 int copy_to_cpu_before = *GetCopyGPUToCPUCounter();
222 TF_CHECK_OK(session.Run({create_const}, &outputs));
223 int copy_to_cpu_after = *GetCopyGPUToCPUCounter();
224 int copy_to_gpu_after = *GetCopyCPUToGPUCounter();
225
226 EXPECT_GT(copy_to_cpu_after - copy_to_cpu_before, 0);
227 EXPECT_GT(copy_to_gpu_after - copy_to_gpu_before, 0);
228
229 EXPECT_EQ(1, outputs.size());
230 EXPECT_EQ(DT_VARIANT, outputs[0].dtype());
231 EXPECT_EQ(0, outputs[0].dims());
232 const Variant& variant = outputs[0].scalar<Variant>()();
233 EXPECT_EQ("StoredTensorValue", variant.TypeName());
234 const StoredTensorValue* to = variant.get<StoredTensorValue>();
235 EXPECT_EQ(to->stored.dtype(), DT_INT64);
236 EXPECT_EQ(0xdeadbeef, to->stored.scalar<int64_t>()());
237 }
238
TEST(VariantOpCopyTest,CreateConstOnGPUFailsGracefully)239 TEST(VariantOpCopyTest, CreateConstOnGPUFailsGracefully) {
240 if (!IsGoogleCudaEnabled()) return;
241
242 Scope root = Scope::NewRootScope().WithDevice("/gpu:0");
243
244 // Create the input StoredTensorValue and serialize it.
245 StoredTensorValue from;
246 from.stored = Tensor(DT_STRING, TensorShape({}));
247 from.stored.scalar<tstring>()() = "hi";
248 VariantTensorData data;
249 data.set_type_name(from.TypeName());
250 from.Encode(&data);
251
252 TensorProto variant_proto;
253 variant_proto.set_dtype(DT_VARIANT);
254 TensorShape scalar_shape({});
255 scalar_shape.AsProto(variant_proto.mutable_tensor_shape());
256 data.ToProto(variant_proto.add_variant_val());
257
258 Output create_const = ops::ConstFromProto(root, variant_proto);
259 TF_ASSERT_OK(root.status());
260 ClientSession session(root);
261 std::vector<Tensor> outputs;
262 Status s = session.Run({create_const}, &outputs);
263 EXPECT_TRUE(absl::StrContains(s.error_message(),
264 "GPU copy from non-DMA string tensor"))
265 << s.ToString();
266 }
267
TEST(VariantOpCopyTest,CreateCopyCPUToCPU)268 TEST(VariantOpCopyTest, CreateCopyCPUToCPU) {
269 Scope root = Scope::NewRootScope().WithDevice("/cpu:0");
270 Tensor t_42(DT_INT32, TensorShape({}));
271 t_42.flat<int32>()(0) = 42;
272 Output create_op = CreateTestVariant(root, t_42);
273 Output identity = ops::Identity(root, create_op);
274
275 TF_ASSERT_OK(root.status());
276
277 ClientSession session(root);
278 std::vector<Tensor> outputs;
279 TF_CHECK_OK(session.Run({create_op, identity}, &outputs));
280 EXPECT_EQ(2, outputs.size());
281 EXPECT_EQ(10, outputs[1].dim_size(0));
282 auto output = outputs[1].flat<Variant>();
283 for (int i = 0; i < 10; ++i) {
284 const Variant& r1 = output(i);
285 EXPECT_EQ("StoredTensorValue", r1.TypeName());
286 const StoredTensorValue* v1 = r1.get<StoredTensorValue>();
287 EXPECT_NE(v1, nullptr);
288 EXPECT_EQ(42, v1->stored.scalar<int32>()());
289 }
290 }
291
TEST(VariantOpCopyTest,CreateCopyCPUToCPUString)292 TEST(VariantOpCopyTest, CreateCopyCPUToCPUString) {
293 Scope root = Scope::NewRootScope().WithDevice("/cpu:0");
294 Tensor t_str(DT_STRING, TensorShape({}));
295 t_str.scalar<tstring>()() = "hi";
296 Output create_op = CreateTestVariant(root, t_str);
297 Output identity = ops::Identity(root, create_op);
298
299 TF_ASSERT_OK(root.status());
300
301 ClientSession session(root);
302 std::vector<Tensor> outputs;
303 TF_CHECK_OK(session.Run({create_op, identity}, &outputs));
304 EXPECT_EQ(2, outputs.size());
305 EXPECT_EQ(10, outputs[1].dim_size(0));
306 auto output = outputs[1].flat<Variant>();
307 for (int i = 0; i < 10; ++i) {
308 const Variant& r1 = output(i);
309 EXPECT_EQ("StoredTensorValue", r1.TypeName());
310 const StoredTensorValue* v1 = r1.get<StoredTensorValue>();
311 EXPECT_NE(v1, nullptr);
312 EXPECT_EQ("hi", v1->stored.scalar<tstring>()());
313 }
314 }
315
TEST(VariantOpCopyTest,CreateCopyCPUToGPU)316 TEST(VariantOpCopyTest, CreateCopyCPUToGPU) {
317 if (!IsGoogleCudaEnabled()) return;
318
319 Scope root = Scope::NewRootScope().WithDevice("/cpu:0");
320 Scope with_gpu = root.WithDevice("/gpu:0");
321 Tensor t_42(DT_INT32, TensorShape({}));
322 t_42.scalar<int32>()() = 42;
323 Output create_op = CreateTestVariant(root, t_42);
324 Output identity = ops::Identity(with_gpu, create_op);
325
326 TF_ASSERT_OK(root.status());
327
328 ClientSession session(root);
329 std::vector<Tensor> outputs;
330 int copy_to_gpu_before = *GetCopyCPUToGPUCounter();
331 int copy_to_cpu_before = *GetCopyGPUToCPUCounter();
332 // Force the identity to run on GPU, and then the data to be copied
333 // back to CPU for the final output.
334 TF_CHECK_OK(session.Run({create_op, identity}, &outputs));
335 int copy_to_cpu_after = *GetCopyGPUToCPUCounter();
336 int copy_to_gpu_after = *GetCopyCPUToGPUCounter();
337
338 EXPECT_GT(copy_to_cpu_after - copy_to_cpu_before, 0);
339 EXPECT_GT(copy_to_gpu_after - copy_to_gpu_before, 0);
340
341 EXPECT_EQ(2, outputs.size());
342 EXPECT_EQ(10, outputs[1].dim_size(0));
343 auto output = outputs[1].flat<Variant>();
344 for (int i = 0; i < 10; ++i) {
345 const Variant& r1 = output(i);
346 EXPECT_EQ("StoredTensorValue", r1.TypeName());
347 const StoredTensorValue* v1 = r1.get<StoredTensorValue>();
348 EXPECT_NE(v1, nullptr);
349 EXPECT_EQ(42, v1->stored.scalar<int32>()());
350 }
351 }
352
TEST(VariantOpCopyTest,CreateCopyCPUToGPUStringFailsSafely)353 TEST(VariantOpCopyTest, CreateCopyCPUToGPUStringFailsSafely) {
354 if (!IsGoogleCudaEnabled()) return;
355
356 Scope root = Scope::NewRootScope().WithDevice("/cpu:0");
357 Scope with_gpu = root.WithDevice("/gpu:0");
358 Tensor t_str(DT_STRING, TensorShape({}));
359 t_str.scalar<tstring>()() = "hi";
360 Output create_op = CreateTestVariant(root, t_str);
361 Output identity = ops::Identity(with_gpu, create_op);
362
363 TF_ASSERT_OK(root.status());
364
365 ClientSession session(root);
366 std::vector<Tensor> outputs;
367 Status err = session.Run({create_op, identity}, &outputs);
368 EXPECT_TRUE(errors::IsInvalidArgument(err));
369 EXPECT_TRUE(
370 absl::StrContains(err.error_message(),
371 "During Variant Host->Device Copy: non-DMA-copy "
372 "attempted of tensor type: string"))
373 << err.error_message();
374 }
375
376 // TODO(ebrevdo): Identify a way to create two virtual GPUs within a
377 // single session, so that we can test the Device <-> Device copy
378 // branch.
379
380 } // end namespace tensorflow
381