1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/common_runtime/placer.h"
17
18 #include <memory>
19 #include <string>
20 #include <unordered_set>
21 #include <utility>
22 #include <vector>
23
24 #include "tensorflow/core/common_runtime/device.h"
25 #include "tensorflow/core/common_runtime/device_factory.h"
26 #include "tensorflow/core/common_runtime/device_set.h"
27 #include "tensorflow/core/common_runtime/graph_constructor.h"
28 #include "tensorflow/core/common_runtime/graph_def_builder_util.h"
29 #include "tensorflow/core/common_runtime/optimization_registry.h"
30 #include "tensorflow/core/framework/device_attributes.pb.h"
31 #include "tensorflow/core/framework/function.h"
32 #include "tensorflow/core/framework/function_testlib.h"
33 #include "tensorflow/core/framework/kernel_def_builder.h"
34 #include "tensorflow/core/framework/op.h"
35 #include "tensorflow/core/framework/op_def_builder.h"
36 #include "tensorflow/core/framework/op_kernel.h"
37 #include "tensorflow/core/framework/types.pb.h"
38 #include "tensorflow/core/graph/graph.h"
39 #include "tensorflow/core/graph/graph_def_builder.h"
40 #include "tensorflow/core/lib/core/errors.h"
41 #include "tensorflow/core/lib/core/status_test_util.h"
42 #include "tensorflow/core/lib/strings/str_util.h"
43 #include "tensorflow/core/lib/strings/strcat.h"
44 #include "tensorflow/core/platform/test.h"
45 #include "tensorflow/core/protobuf/config.pb.h"
46 #include "tensorflow/core/protobuf/error_codes.pb.h"
47 #include "tensorflow/core/protobuf/rewriter_config.pb.h"
48
49 namespace tensorflow {
50
51 using ::tensorflow::test::function::GDef;
52 using ::tensorflow::test::function::NDef;
53 using FDH = ::tensorflow::FunctionDefHelper;
54
55 constexpr char kCPU[] = "/device:FakeCPU:0";
56 constexpr char kGPU[] = "/device:FakeGPU:0";
57
58 constexpr char kFullCPU[] = "/job:a/replica:0/task:0/device:FakeCPU:0";
59 constexpr char kFullGPU[] = "/job:a/replica:0/task:0/device:FakeGPU:0";
60
61 namespace {
62
63 ////////////////////////////////////////////////////////////////////////////////
64 //
65 // Op, kernel, and device registrations to set up the environment.
66 //
67 // The Placer uses information about the op (input types),
68 // kernel (device constraints), and available devices to make
69 // placement decisions. To avoid depending on the full runtime, we
70 // define dummy implementations of these, and register them with the
71 // runtime.
72 //
73 ////////////////////////////////////////////////////////////////////////////////
74
75 // A dummy OpKernel that is used to register ops on different devices.
76 class DummyOp : public OpKernel {
77 public:
DummyOp(OpKernelConstruction * context)78 explicit DummyOp(OpKernelConstruction* context) : OpKernel(context) {}
Compute(OpKernelContext * context)79 void Compute(OpKernelContext* context) override {}
80 };
81
82 // A fake device that has specific device attributes, used to simulate
83 // the presence of a CPU or a GPU (without depending on that part of
84 // the runtime.
85 class FakeDevice : public Device {
86 private:
FakeDevice(const DeviceAttributes & device_attributes)87 explicit FakeDevice(const DeviceAttributes& device_attributes)
88 : Device(nullptr, device_attributes) {}
89
90 public:
Sync()91 Status Sync() override { return errors::Unimplemented("FakeDevice::Sync()"); }
92
GetAllocator(AllocatorAttributes attr)93 Allocator* GetAllocator(AllocatorAttributes attr) override { return nullptr; }
94
MakeDevice(const string & name,const string & device_type)95 static std::unique_ptr<Device> MakeDevice(const string& name,
96 const string& device_type) {
97 DeviceAttributes device_attributes;
98 device_attributes.set_name(name);
99 device_attributes.set_device_type(device_type);
100 return std::unique_ptr<Device>(new FakeDevice(device_attributes));
101 }
102
MakeCPU(const string & name)103 static std::unique_ptr<Device> MakeCPU(const string& name) {
104 return MakeDevice(name, "FakeCPU");
105 }
106
MakeGPU(const string & name)107 static std::unique_ptr<Device> MakeGPU(const string& name) {
108 return MakeDevice(name, "FakeGPU");
109 }
110 };
111
112 class DummyFactory : public DeviceFactory {
113 public:
ListPhysicalDevices(std::vector<string> * devices)114 Status ListPhysicalDevices(std::vector<string>* devices) override {
115 return OkStatus();
116 }
CreateDevices(const SessionOptions & options,const string & name_prefix,std::vector<std::unique_ptr<Device>> * devices)117 Status CreateDevices(const SessionOptions& options, const string& name_prefix,
118 std::vector<std::unique_ptr<Device>>* devices) override {
119 return OkStatus();
120 }
121 };
122
123 // Device order now depends on the registration of devices, not a fixed
124 // value in device_set.cc. To avoid the need to link in the real CPU and GPU
125 // devices into this test, we create fake devices and registrations that
126 // can stand-in for the real devices for the purposes of testing placement
127 // and ordering.
128 REGISTER_LOCAL_DEVICE_FACTORY("FakeCPU", DummyFactory);
129 REGISTER_LOCAL_DEVICE_FACTORY("FakeGPU", DummyFactory, 51);
130
131 // Register the following ops so they can be added to a Graph, and
132 // kernels so that they can be placed on particular device types.
133 REGISTER_OP("TestVariable").Output("o: Ref(float)");
134 REGISTER_KERNEL_BUILDER(Name("TestVariable").Device("FakeCPU"), DummyOp);
135 REGISTER_KERNEL_BUILDER(Name("TestVariable").Device("FakeGPU"), DummyOp);
136
137 REGISTER_OP("VariableCPU").Output("o: Ref(float)");
138 REGISTER_KERNEL_BUILDER(Name("VariableCPU").Device("FakeCPU"), DummyOp);
139
140 REGISTER_OP("VariableGPU").Output("o: Ref(float)");
141 REGISTER_KERNEL_BUILDER(Name("VariableGPU").Device("FakeGPU"), DummyOp);
142
143 REGISTER_OP("VariableNoKernels").Output("o: Ref(float)");
144
145 REGISTER_OP("TestAdd").Input("a: float").Input("b: float").Output("o: float");
146 REGISTER_KERNEL_BUILDER(Name("TestAdd").Device("FakeCPU"), DummyOp);
147 REGISTER_KERNEL_BUILDER(Name("TestAdd").Device("FakeGPU"), DummyOp);
148
149 REGISTER_OP("TestRelu").Input("i: float").Output("o: float");
150 REGISTER_KERNEL_BUILDER(Name("TestRelu").Device("FakeCPU"), DummyOp);
151 REGISTER_KERNEL_BUILDER(Name("TestRelu").Device("FakeGPU"), DummyOp);
152
153 REGISTER_OP("ReluCPU").Input("i: float").Output("o: float");
154 REGISTER_KERNEL_BUILDER(Name("ReluCPU").Device("FakeCPU"), DummyOp);
155
156 REGISTER_OP("ReluGPU").Input("i: float").Output("o: float");
157 REGISTER_KERNEL_BUILDER(Name("ReluGPU").Device("FakeGPU"), DummyOp);
158
159 REGISTER_OP("TestAssign").Input("i: Ref(float)").Input("v: float");
160 REGISTER_KERNEL_BUILDER(Name("TestAssign").Device("FakeCPU"), DummyOp);
161 REGISTER_KERNEL_BUILDER(Name("TestAssign").Device("FakeGPU"), DummyOp);
162
163 REGISTER_OP("AssignCPU").Input("i: Ref(float)").Input("v: float");
164 REGISTER_KERNEL_BUILDER(Name("AssignCPU").Device("FakeCPU"), DummyOp);
165
166 REGISTER_OP("AssignGPU").Input("i: Ref(float)").Input("v: float");
167 REGISTER_KERNEL_BUILDER(Name("AssignGPU").Device("FakeGPU"), DummyOp);
168
169 REGISTER_OP("TestInput").Output("a: float").Output("b: float");
170 REGISTER_KERNEL_BUILDER(Name("TestInput").Device("FakeCPU"), DummyOp);
171
172 // Op producing an output that can be placed on CPU or GPU.
173 REGISTER_OP("TestCPUGPUOutput").Output("a: float");
174 REGISTER_KERNEL_BUILDER(Name("TestCPUGPUOutput").Device("FakeCPU"), DummyOp);
175 REGISTER_KERNEL_BUILDER(Name("TestCPUGPUOutput").Device("FakeGPU"), DummyOp);
176
177 REGISTER_OP("TestGPUOutput").Output("a: float");
178 REGISTER_KERNEL_BUILDER(Name("TestGPUOutput").Device("FakeGPU"), DummyOp);
179
180 REGISTER_OP("TestDevice").Output("a: float").Output("b: float");
181 REGISTER_KERNEL_BUILDER(Name("TestDevice").Device("FakeGPU"), DummyOp);
182
183 REGISTER_OP("TestDeviceEnforce").Input("a: Ref(float)").Output("b: float");
184 REGISTER_KERNEL_BUILDER(Name("TestDeviceEnforce").Device("FakeCPU"), DummyOp);
185 REGISTER_KERNEL_BUILDER(Name("TestDeviceEnforce").Device("FakeGPU"), DummyOp);
186
187 REGISTER_KERNEL_BUILDER(Name("Shape").Device("FakeCPU"), DummyOp);
188 REGISTER_KERNEL_BUILDER(Name("Shape").Device("FakeGPU"), DummyOp);
189
190 // Op that has kernels with device priorities specified.
191 REGISTER_OP("TestDatasetOp").Input("a: float").Output("b: float");
192 REGISTER_KERNEL_BUILDER(Name("TestDatasetOp").Device("FakeCPU").Priority(2),
193 DummyOp);
194 REGISTER_KERNEL_BUILDER(Name("TestDatasetOp").Device("FakeGPU").Priority(1),
195 DummyOp);
196
197 // Op that has kernels with XLA device priority higher than FakeCPU.
198 REGISTER_OP("TestXlaOp").Input("a: float").Output("b: float");
199 REGISTER_KERNEL_BUILDER(Name("TestXlaOp").Device("XLA_CPU").Priority(2),
200 DummyOp);
201 REGISTER_KERNEL_BUILDER(Name("TestXlaOp").Device("FakeCPU").Priority(1),
202 DummyOp);
203
204 // Op with no-copy type definition.
205 REGISTER_OP("TestUncopiableTypeGeneratorCPU")
206 .Output("d: variant")
207 .SetTypeConstructor(full_type::UnaryGeneric(TFT_DATASET));
208 REGISTER_KERNEL_BUILDER(
209 Name("TestUncopiableTypeGeneratorCPU").Device("FakeCPU"), DummyOp);
210
211 // Op consuming a typed input.
212 REGISTER_OP("TestTypedConsumer").Input("i: variant");
213 REGISTER_KERNEL_BUILDER(Name("TestTypedConsumer").Device("FakeCPU"), DummyOp);
214 REGISTER_KERNEL_BUILDER(Name("TestTypedConsumer").Device("FakeGPU"), DummyOp);
215
216 ////////////////////////////////////////////////////////////////////////////////
217 //
218 // A PlacerTest method has three phases:
219 //
220 // 1. Build a TensorFlow graph, with no (or partial) device assignments.
221 // 2. Attempt to compute a placement using the Placer.
222 // 3. EITHER: test that the constraints implied by the graph are respected;
223 // or that an appropriate error was reported.
224 //
225 ////////////////////////////////////////////////////////////////////////////////
226 class PlacerTest : public ::testing::Test {
227 protected:
PlacerTest()228 PlacerTest() : PlacerTest(10) {}
229
PlacerTest(int num_devices)230 explicit PlacerTest(int num_devices) {
231 // Build a set of num_devices GPU, num_devices CPU devices, and one XLA_CPU
232 // device.
233 // NOTE: this->local_devices_ owns the device objects;
234 // this->devices_ contains borrowed pointers to the device
235 // objects.
236 for (int i = 0; i < num_devices; ++i) {
237 local_devices_.emplace_back(FakeDevice::MakeCPU(
238 strings::StrCat("/job:a/replica:0/task:0/device:FakeCPU:", i)));
239 devices_.AddDevice(local_devices_.back().get());
240 // Insert the GPUs in reverse order.
241 local_devices_.emplace_back(FakeDevice::MakeGPU(strings::StrCat(
242 "/job:a/replica:0/task:0/device:FakeGPU:", num_devices - 1 - i)));
243 devices_.AddDevice(local_devices_.back().get());
244 }
245 local_devices_.emplace_back(FakeDevice::MakeDevice(
246 "/job:a/replica:0/task:0/device:XLA_CPU:0", "XLA_CPU"));
247 devices_.AddDevice(local_devices_.back().get());
248 local_devices_.emplace_back(FakeDevice::MakeDevice(
249 "/job:a/replica:0/task:0/device:COMPOSITE:0", "COMPOSITE"));
250 devices_.AddDevice(local_devices_.back().get());
251 }
252
253 // Builds the given graph, and (if successful) indexes the node
254 // names for use in placement, and later lookup.
BuildGraph(const GraphDefBuilder & builder,Graph * out_graph)255 Status BuildGraph(const GraphDefBuilder& builder, Graph* out_graph) {
256 TF_RETURN_IF_ERROR(GraphDefBuilderToGraph(builder, out_graph));
257 RebuildNodeNameMap(*out_graph);
258 return OkStatus();
259 }
260
BuildGraph(const GraphDef & graph_def,Graph * out_graph)261 Status BuildGraph(const GraphDef& graph_def, Graph* out_graph) {
262 GraphConstructorOptions opts;
263 TF_RETURN_IF_ERROR(ConvertGraphDefToGraph(opts, graph_def, out_graph));
264 RebuildNodeNameMap(*out_graph);
265 return OkStatus();
266 }
267
268 // Invokes the Placer on "graph". If no DeviceSet is specified, the
269 // placement will use the default DeviceSet (of 10 CPU and 10 GPU devices).
270 //
271 // REQUIRES: "*graph" was produced by the most recent call to BuildGraph.
Place(Graph * graph,DeviceSet * devices,Device * default_local_device,bool allow_soft_placement,bool log_device_placement)272 Status Place(Graph* graph, DeviceSet* devices, Device* default_local_device,
273 bool allow_soft_placement, bool log_device_placement) {
274 Placer placer(graph, "", &graph->flib_def(), devices, default_local_device,
275 allow_soft_placement, log_device_placement);
276 return placer.Run();
277 }
278
CallOptPassesAndPlace(Graph * graph,DeviceSet * devices,bool allow_soft_placement,bool log_device_placement)279 Status CallOptPassesAndPlace(Graph* graph, DeviceSet* devices,
280 bool allow_soft_placement,
281 bool log_device_placement) {
282 // Disable all real optimizations (i.e. Grappler and GraphOptimizer)
283 // to make sure functions are not inlined and not constant folded
284 SessionOptions session_options;
285 GraphOptions* graph_opts = session_options.config.mutable_graph_options();
286 OptimizerOptions* optimizer_opts = graph_opts->mutable_optimizer_options();
287 optimizer_opts->set_opt_level(OptimizerOptions::L0);
288 optimizer_opts->set_global_jit_level(OptimizerOptions::OFF);
289 RewriterConfig* rewriter_config = graph_opts->mutable_rewrite_options();
290 rewriter_config->set_disable_meta_optimizer(true);
291
292 // Placing nested functions requires go through some PRE_PLACEMENT passes.
293 // Currently, just the IsolateDeepOpsPass.
294 GraphOptimizationPassOptions optimization_options;
295 std::unique_ptr<Graph> graph_ptr(graph);
296 optimization_options.graph = &graph_ptr;
297 FunctionLibraryDefinition flib_def(graph->flib_def());
298 optimization_options.flib_def = &flib_def;
299 optimization_options.device_set = &devices_;
300 optimization_options.session_options = &session_options;
301 Status s = OptimizationPassRegistry::Global()->RunGrouping(
302 OptimizationPassRegistry::PRE_PLACEMENT, optimization_options);
303 if (!s.ok()) {
304 graph_ptr.release();
305 return s;
306 }
307 graph = graph_ptr.release();
308
309 RebuildNodeNameMap(*graph);
310
311 Placer placer(graph, "", &graph->flib_def(), devices, nullptr,
312 allow_soft_placement, log_device_placement);
313 return placer.Run();
314 }
315
Place(Graph * graph,DeviceSet * devices)316 Status Place(Graph* graph, DeviceSet* devices) {
317 return Place(graph, devices, nullptr, true, false);
318 }
319
Place(Graph * graph,bool allow_soft_placement,bool log_device_placement)320 Status Place(Graph* graph, bool allow_soft_placement,
321 bool log_device_placement) {
322 return Place(graph, &devices_, nullptr, allow_soft_placement,
323 log_device_placement);
324 }
325
Place(Graph * graph)326 Status Place(Graph* graph) {
327 return Place(graph, &devices_, nullptr, true, false);
328 }
329
CallOptPassesAndPlace(Graph * graph,bool allow_soft_placement,bool log_device_placement)330 Status CallOptPassesAndPlace(Graph* graph, bool allow_soft_placement,
331 bool log_device_placement) {
332 return CallOptPassesAndPlace(graph, &devices_, allow_soft_placement,
333 log_device_placement);
334 }
335
CallOptPassesAndPlace(Graph * graph)336 Status CallOptPassesAndPlace(Graph* graph) {
337 return CallOptPassesAndPlace(graph, &devices_, true, false);
338 }
339
340 // Returns the node in "graph" with the given name.
341 //
342 // REQUIRES: "graph" was produced by the most recent call to BuildGraph.
GetNodeByName(const Graph & graph,const string & name)343 Node* GetNodeByName(const Graph& graph, const string& name) {
344 const auto search = nodes_by_name_.find(name);
345 CHECK(search != nodes_by_name_.end()) << "Unknown node name: " << name;
346 return graph.FindNodeId(search->second);
347 }
348
349 protected:
350 std::vector<std::unique_ptr<Device>> local_devices_;
351 DeviceSet devices_;
352 std::unordered_map<string, int> nodes_by_name_;
353
354 Status ReferenceTestHelper(const string& variable_op_type,
355 const string& assign_op_type,
356 const DeviceType& expected_device_type);
357
358 private:
RebuildNodeNameMap(const Graph & graph)359 void RebuildNodeNameMap(const Graph& graph) {
360 nodes_by_name_.clear();
361 for (Node* node : graph.nodes()) {
362 nodes_by_name_[node->name()] = node->id();
363 }
364 }
365 };
366
367 // Fixture that add a parameter for allow_soft_placement.
368 // Test cases that want to test behavior with and without soft placement
369 // can use this fixture instead of PlacerTest.
370 class SoftPlacementPlacerTest : public PlacerTest,
371 public ::testing::WithParamInterface<bool> {};
372
373 INSTANTIATE_TEST_SUITE_P(All, SoftPlacementPlacerTest,
374 ::testing::Values(false, true),
375 ::testing::PrintToStringParamName());
376
377 #define EXPECT_COLOCATED(g, name_a, name_b) \
378 do { \
379 Graph& g_ = (g); \
380 EXPECT_EQ(GetNodeByName(g_, (name_a))->assigned_device_name(), \
381 GetNodeByName(g_, (name_b))->assigned_device_name()); \
382 } while (0)
383
384 #define EXPECT_NOT_COLOCATED(g, name_a, name_b) \
385 do { \
386 Graph& g_ = (g); \
387 EXPECT_NE(GetNodeByName(g_, (name_a))->assigned_device_name(), \
388 GetNodeByName(g_, (name_b))->assigned_device_name()); \
389 } while (0)
390
391 #define EXPECT_DEVICE_TYPE(g, name, expected_device_type) \
392 EXPECT_EQ(DeviceType(expected_device_type).type(), \
393 devices_ \
394 .FindDeviceByName( \
395 GetNodeByName((g), (name))->assigned_device_name()) \
396 ->attributes() \
397 .device_type())
398
399 #define EXPECT_SAME_TYPE(g, node1, node2) \
400 EXPECT_EQ(devices_ \
401 .FindDeviceByName( \
402 GetNodeByName((g), (node1))->assigned_device_name()) \
403 ->attributes() \
404 .device_type(), \
405 devices_ \
406 .FindDeviceByName( \
407 GetNodeByName((g), (node2))->assigned_device_name()) \
408 ->attributes() \
409 .device_type())
410
411 #define EXPECT_DEVICE_CONTAINS(g, name, device_substr) \
412 EXPECT_TRUE(absl::StrContains( \
413 GetNodeByName((g), (name))->assigned_device_name(), device_substr))
414
415 // Test that a graph with no constraints will successfully assign nodes to the
416 // "best available" device (i.e. prefer GPU over CPU).
TEST_F(PlacerTest,TestNoConstraints)417 TEST_F(PlacerTest, TestNoConstraints) {
418 Graph g(OpRegistry::Global());
419 { // Scope for temporary variables used to construct g.
420 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
421 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
422 ops::UnaryOp("TestRelu", ops::NodeOut(input, 0), b.opts().WithName("n1"));
423 ops::UnaryOp("TestRelu", ops::NodeOut(input, 1), b.opts().WithName("n2"));
424 TF_EXPECT_OK(BuildGraph(b, &g));
425 }
426
427 TF_EXPECT_OK(Place(&g));
428 EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
429 EXPECT_DEVICE_TYPE(g, "n1", "FakeGPU");
430 EXPECT_DEVICE_TYPE(g, "n2", "FakeGPU");
431 }
432
433 // Test that a graph with no constraints but using kernels that have a specified
434 // device priority will successfully assign nodes to the device with higher
435 // priority
TEST_F(PlacerTest,TestNoConstraintsWithPrioritizedKernels)436 TEST_F(PlacerTest, TestNoConstraintsWithPrioritizedKernels) {
437 Graph g(OpRegistry::Global());
438 { // Scope for temporary variables used to construct g.
439 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
440 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
441 ops::UnaryOp("TestDatasetOp", ops::NodeOut(input, 0),
442 b.opts().WithName("n1"));
443 ops::UnaryOp("TestDatasetOp", ops::NodeOut(input, 1),
444 b.opts().WithName("n2"));
445 TF_EXPECT_OK(BuildGraph(b, &g));
446 }
447
448 TF_EXPECT_OK(Place(&g));
449 EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
450 EXPECT_DEVICE_TYPE(g, "n1", "FakeCPU");
451 EXPECT_DEVICE_TYPE(g, "n2", "FakeCPU");
452 }
453
454 // Test that if the node supports XLA_CPU and FakeCPU, it will be placed on
455 // XLA_CPU if and only if the node is assigned to the XLA_CPU device.
TEST_F(PlacerTest,TestXlaOpPlacement)456 TEST_F(PlacerTest, TestXlaOpPlacement) {
457 Graph g(OpRegistry::Global());
458 { // Scope for temporary variables used to construct g.
459 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
460 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
461 ops::UnaryOp("TestXlaOp", ops::NodeOut(input, 0), b.opts().WithName("n1"));
462 ops::UnaryOp("TestXlaOp", ops::NodeOut(input, 1), b.opts().WithName("n2"));
463 TF_EXPECT_OK(BuildGraph(b, &g));
464 }
465
466 GetNodeByName(g, "n2")->set_assigned_device_name(
467 "/job:a/replica:0/task:0/device:XLA_CPU:0");
468
469 TF_EXPECT_OK(Place(&g));
470 EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
471 // n1 should be placed on FakeCPU even if the op supports XLA_CPU with higher
472 // priority than FakeCPU.
473 EXPECT_DEVICE_TYPE(g, "n1", "FakeCPU");
474 // n2 should be placed on XLA_CPU because it supports XLA_CPU and it is
475 // assigned to a XLA_CPU device.
476 EXPECT_DEVICE_TYPE(g, "n2", "XLA_CPU");
477 }
478
TEST_F(PlacerTest,TestGPUInputIntoPrioritizedKernel)479 TEST_F(PlacerTest, TestGPUInputIntoPrioritizedKernel) {
480 Graph g(OpRegistry::Global());
481 {
482 // Scope for temp variables used to construct g.
483 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
484 Node* input = ops::SourceOp("TestGPUOutput", b.opts().WithName("in"));
485 ops::UnaryOp("TestDatasetOp", ops::NodeOut(input, 0),
486 b.opts().WithName("n1"));
487 TF_EXPECT_OK(BuildGraph(b, &g));
488 }
489
490 TF_EXPECT_OK(Place(&g));
491 EXPECT_DEVICE_TYPE(g, "in", "FakeGPU");
492 EXPECT_DEVICE_TYPE(g, "n1", "FakeCPU");
493 }
494
495 // Tests that a GPU kernel colocated with prioritized kernel respects it.
TEST_F(PlacerTest,TestGPUInputColocatedWithPrioritizedKernel)496 TEST_F(PlacerTest, TestGPUInputColocatedWithPrioritizedKernel) {
497 Graph g(OpRegistry::Global());
498 {
499 // Scope for temp variables used to construct g.
500 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
501 Node* input = ops::SourceOp("TestGPUOutput", b.opts().WithName("in"));
502 // We colocate n1 with in.
503 ops::UnaryOp("TestDatasetOp", ops::NodeOut(input, 0),
504 b.opts().WithName("n1").WithAttr("_class", {"loc:@in"}));
505 // We don't colocate n2 with in.
506 ops::UnaryOp("TestDatasetOp", ops::NodeOut(input, 0),
507 b.opts().WithName("n2"));
508 TF_EXPECT_OK(BuildGraph(b, &g));
509 }
510
511 TF_EXPECT_OK(Place(&g));
512 EXPECT_DEVICE_TYPE(g, "in", "FakeGPU");
513 EXPECT_DEVICE_TYPE(g, "n1", "FakeGPU");
514 EXPECT_DEVICE_TYPE(g, "n2", "FakeCPU");
515 }
516
517 REGISTER_OP("CreateDatasetCPU").Output("o: resource");
518 REGISTER_KERNEL_BUILDER(Name("CreateDatasetCPU").Device("FakeCPU"), DummyOp);
519 REGISTER_OP("CreateDatasetGPU").Output("o: resource");
520 REGISTER_KERNEL_BUILDER(Name("CreateDatasetGPU").Device("FakeGPU"), DummyOp);
521
522 REGISTER_OP("CreateDatasetSP").Output("o: resource");
523 REGISTER_KERNEL_BUILDER(Name("CreateDatasetSP").Device("FakeCPU").Priority(2),
524 DummyOp);
525 REGISTER_KERNEL_BUILDER(Name("CreateDatasetSP").Device("FakeGPU").Priority(1),
526 DummyOp);
527
528 REGISTER_OP("CreateDatasetRP").Output("o: resource");
529 REGISTER_KERNEL_BUILDER(Name("CreateDatasetRP").Device("FakeCPU").Priority(1),
530 DummyOp);
531 REGISTER_KERNEL_BUILDER(Name("CreateDatasetRP").Device("FakeGPU").Priority(2),
532 DummyOp);
533
534 REGISTER_OP("CreateDatasetNP").Output("o: resource");
535 REGISTER_KERNEL_BUILDER(Name("CreateDatasetNP").Device("FakeCPU"), DummyOp);
536 REGISTER_KERNEL_BUILDER(Name("CreateDatasetNP").Device("FakeGPU"), DummyOp);
537
538 REGISTER_OP("IteratorNP").Input("i: resource").Output("o: float");
539 REGISTER_KERNEL_BUILDER(Name("IteratorNP").Device("FakeCPU"), DummyOp);
540 REGISTER_KERNEL_BUILDER(Name("IteratorNP").Device("FakeGPU"), DummyOp);
541
542 REGISTER_OP("IteratorSP").Input("i: resource").Output("o: float");
543 REGISTER_KERNEL_BUILDER(Name("IteratorSP").Device("FakeCPU").Priority(2),
544 DummyOp);
545 REGISTER_KERNEL_BUILDER(Name("IteratorSP").Device("FakeGPU").Priority(1),
546 DummyOp);
547
548 REGISTER_OP("IteratorRP").Input("i: resource").Output("o: float");
549 REGISTER_KERNEL_BUILDER(Name("IteratorRP").Device("FakeCPU").Priority(1),
550 DummyOp);
551 REGISTER_KERNEL_BUILDER(Name("IteratorRP").Device("FakeGPU").Priority(2),
552 DummyOp);
553
554 REGISTER_OP("IteratorGPU").Input("i: resource").Output("o: float");
555 REGISTER_KERNEL_BUILDER(Name("IteratorGPU").Device("FakeGPU"), DummyOp);
556
557 // Test reference edges with one node having prioritized kernels and the other
558 // has no preference. We should respect priority here.
TEST_F(PlacerTest,TestDSWithPriority)559 TEST_F(PlacerTest, TestDSWithPriority) {
560 Graph g(OpRegistry::Global());
561 {
562 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
563 Node* ds = ops::SourceOp("CreateDatasetSP", b.opts().WithName("ds"));
564 ops::UnaryOp("IteratorNP", ops::NodeOut(ds, 0), b.opts().WithName("it"));
565 TF_EXPECT_OK(BuildGraph(b, &g));
566 }
567 TF_EXPECT_OK(Place(&g));
568 EXPECT_DEVICE_TYPE(g, "ds", "FakeCPU");
569 EXPECT_DEVICE_TYPE(g, "it", "FakeCPU");
570 }
571
572 // Test reference edges with one node having kernels with regular priority and
573 // the other has no preference. We should respect priority here.
TEST_F(PlacerTest,TestDSWithGPUPriority)574 TEST_F(PlacerTest, TestDSWithGPUPriority) {
575 Graph g(OpRegistry::Global());
576 {
577 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
578 Node* ds = ops::SourceOp("CreateDatasetRP", b.opts().WithName("ds"));
579 ops::UnaryOp("IteratorNP", ops::NodeOut(ds, 0), b.opts().WithName("it"));
580 TF_EXPECT_OK(BuildGraph(b, &g));
581 }
582 TF_EXPECT_OK(Place(&g));
583 EXPECT_DEVICE_TYPE(g, "ds", "FakeGPU");
584 EXPECT_DEVICE_TYPE(g, "it", "FakeGPU");
585 }
586
587 // Test reference edges with one node having prioritized kernels and the other
588 // has no preference. We should respect priority here.
TEST_F(PlacerTest,TestITWithPriority)589 TEST_F(PlacerTest, TestITWithPriority) {
590 Graph g(OpRegistry::Global());
591 {
592 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
593 Node* ds = ops::SourceOp("CreateDatasetNP", b.opts().WithName("ds"));
594 ops::UnaryOp("IteratorSP", ops::NodeOut(ds, 0), b.opts().WithName("it"));
595 TF_EXPECT_OK(BuildGraph(b, &g));
596 }
597 TF_EXPECT_OK(Place(&g));
598 EXPECT_DEVICE_TYPE(g, "ds", "FakeCPU");
599 EXPECT_DEVICE_TYPE(g, "it", "FakeCPU");
600 }
601
602 // Test reference edges with one node having kernels with regular priority and
603 // the other has no preference. We should respect priority here.
TEST_F(PlacerTest,TestITWithGPUPriority)604 TEST_F(PlacerTest, TestITWithGPUPriority) {
605 Graph g(OpRegistry::Global());
606 {
607 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
608 Node* ds = ops::SourceOp("CreateDatasetNP", b.opts().WithName("ds"));
609 ops::UnaryOp("IteratorRP", ops::NodeOut(ds, 0), b.opts().WithName("it"));
610 TF_EXPECT_OK(BuildGraph(b, &g));
611 }
612 TF_EXPECT_OK(Place(&g));
613 EXPECT_DEVICE_TYPE(g, "ds", "FakeGPU");
614 EXPECT_DEVICE_TYPE(g, "it", "FakeGPU");
615 }
616
617 // Test reference edges with one node having prioritized kernels and other node
618 // can only be placed on GPU. We should respect the constraint then.
TEST_F(PlacerTest,TestITGPU)619 TEST_F(PlacerTest, TestITGPU) {
620 Graph g(OpRegistry::Global());
621 {
622 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
623 Node* ds = ops::SourceOp("CreateDatasetSP", b.opts().WithName("ds"));
624 ops::UnaryOp("IteratorGPU", ops::NodeOut(ds, 0), b.opts().WithName("it"));
625 TF_EXPECT_OK(BuildGraph(b, &g));
626 }
627 TF_EXPECT_OK(Place(&g));
628 EXPECT_DEVICE_TYPE(g, "ds", "FakeGPU");
629 EXPECT_DEVICE_TYPE(g, "it", "FakeGPU");
630 }
631
632 // Test reference edges with one node having prioritized kernels and other node
633 // can only be placed on CPU. We should respect the constraint then.
TEST_F(PlacerTest,TestSimpleIteratorOnlyGPU)634 TEST_F(PlacerTest, TestSimpleIteratorOnlyGPU) {
635 Graph g(OpRegistry::Global());
636 {
637 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
638 Node* ds = ops::SourceOp("CreateDatasetCPU", b.opts().WithName("ds"));
639 ops::UnaryOp("IteratorRP", ops::NodeOut(ds, 0), b.opts().WithName("it"));
640 TF_EXPECT_OK(BuildGraph(b, &g));
641 }
642 TF_EXPECT_OK(Place(&g));
643 EXPECT_DEVICE_TYPE(g, "ds", "FakeCPU");
644 EXPECT_DEVICE_TYPE(g, "it", "FakeCPU");
645 }
646
647 // Test constraints with agreeing priorities.
TEST_F(PlacerTest,TestAgreeingPriorities)648 TEST_F(PlacerTest, TestAgreeingPriorities) {
649 Graph g(OpRegistry::Global());
650 {
651 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
652 Node* ds = ops::SourceOp("CreateDatasetSP", b.opts().WithName("ds"));
653 ops::UnaryOp("IteratorSP", ops::NodeOut(ds, 0), b.opts().WithName("it"));
654 TF_EXPECT_OK(BuildGraph(b, &g));
655 }
656 TF_EXPECT_OK(Place(&g));
657 EXPECT_DEVICE_TYPE(g, "ds", "FakeCPU");
658 EXPECT_DEVICE_TYPE(g, "it", "FakeCPU");
659 }
660
661 // Test constraints with agreeing regular priorities.
TEST_F(PlacerTest,TestAgreeingRegularPriorities)662 TEST_F(PlacerTest, TestAgreeingRegularPriorities) {
663 Graph g(OpRegistry::Global());
664 {
665 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
666 Node* ds = ops::SourceOp("CreateDatasetRP", b.opts().WithName("ds"));
667 ops::UnaryOp("IteratorRP", ops::NodeOut(ds, 0), b.opts().WithName("it"));
668 TF_EXPECT_OK(BuildGraph(b, &g));
669 }
670 TF_EXPECT_OK(Place(&g));
671 EXPECT_DEVICE_TYPE(g, "ds", "FakeGPU");
672 EXPECT_DEVICE_TYPE(g, "it", "FakeGPU");
673 }
674
675 // Test constraints with different priorities. In this case, we should bail
676 // and just revert to default.
TEST_F(PlacerTest,TestConflictingPriorities)677 TEST_F(PlacerTest, TestConflictingPriorities) {
678 Graph g(OpRegistry::Global());
679 {
680 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
681 Node* ds = ops::SourceOp("CreateDatasetSP", b.opts().WithName("ds"));
682 ops::UnaryOp("IteratorRP", ops::NodeOut(ds, 0), b.opts().WithName("it"));
683 TF_EXPECT_OK(BuildGraph(b, &g));
684 }
685 TF_EXPECT_OK(Place(&g));
686 EXPECT_DEVICE_TYPE(g, "ds", "FakeGPU");
687 EXPECT_DEVICE_TYPE(g, "it", "FakeGPU");
688 }
689
690 // Test constraints with different priorities. In this case, we should bail
691 // and just revert to default.
TEST_F(PlacerTest,TestConflictingPrioritiesReversed)692 TEST_F(PlacerTest, TestConflictingPrioritiesReversed) {
693 Graph g(OpRegistry::Global());
694 {
695 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
696 Node* ds = ops::SourceOp("CreateDatasetRP", b.opts().WithName("ds"));
697 ops::UnaryOp("IteratorSP", ops::NodeOut(ds, 0), b.opts().WithName("it"));
698 TF_EXPECT_OK(BuildGraph(b, &g));
699 }
700 TF_EXPECT_OK(Place(&g));
701 EXPECT_DEVICE_TYPE(g, "ds", "FakeGPU");
702 EXPECT_DEVICE_TYPE(g, "it", "FakeGPU");
703 }
704
705 // Test that a graph with device type and reference constraints on
706 // some of the ops will successfully assign nodes to the constrained
707 // device, and colocate nodes with reference connections.
TEST_F(PlacerTest,TestDeviceTypeConstraints)708 TEST_F(PlacerTest, TestDeviceTypeConstraints) {
709 Graph g(OpRegistry::Global());
710 { // Scope for temporary variables used to construct g.
711 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
712 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
713 Node* var_cpu = ops::SourceOp("VariableCPU", b.opts().WithName("var_cpu"));
714 ops::BinaryOp("AssignCPU", var_cpu, input, b.opts().WithName("assign_cpu"));
715 Node* var_gpu = ops::SourceOp("VariableGPU", b.opts().WithName("var_gpu"));
716 ops::BinaryOp("AssignGPU", var_gpu, input, b.opts().WithName("assign_gpu"));
717 TF_EXPECT_OK(BuildGraph(b, &g));
718 }
719
720 TF_EXPECT_OK(Place(&g));
721 EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
722 EXPECT_DEVICE_TYPE(g, "var_cpu", "FakeCPU");
723 EXPECT_DEVICE_TYPE(g, "assign_cpu", "FakeCPU");
724 EXPECT_COLOCATED(g, "var_cpu", "assign_cpu");
725 EXPECT_DEVICE_TYPE(g, "var_gpu", "FakeGPU");
726 EXPECT_DEVICE_TYPE(g, "assign_gpu", "FakeGPU");
727 EXPECT_COLOCATED(g, "var_gpu", "assign_gpu");
728 }
729
TEST_F(PlacerTest,TestMetadataColocatedWithInput)730 TEST_F(PlacerTest, TestMetadataColocatedWithInput) {
731 Graph g(OpRegistry::Global());
732 { // Scope for temporary variables used to construct g.
733 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
734 Node* var_cpu = ops::SourceOp("VariableCPU", b.opts().WithName("var_cpu"));
735
736 // Normally, shape has a GPU implementation and would be placed
737 // on GPU. However, because it is a metadata operation, it is
738 // placed on CPU to avoid transferring the data from CPU to GPU.
739 ops::UnaryOp("Shape", var_cpu, b.opts().WithName("shape_op"));
740 TF_EXPECT_OK(BuildGraph(b, &g));
741 }
742
743 TF_EXPECT_OK(Place(&g));
744 EXPECT_DEVICE_TYPE(g, "var_cpu", "FakeCPU");
745 EXPECT_DEVICE_TYPE(g, "shape_op", "FakeCPU");
746 EXPECT_COLOCATED(g, "var_cpu", "shape_op");
747 }
748
749 // Heuristic A implements "Island fusing": if a node only generates
750 // an output and it has only one consumer, we place the node
751 // with its consumer.
TEST_F(PlacerTest,TestHeuristicGeneratorFollowsSingleConsumer)752 TEST_F(PlacerTest, TestHeuristicGeneratorFollowsSingleConsumer) {
753 Graph g(OpRegistry::Global());
754 { // Scope for temporary variables used to construct g.
755 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
756
757 // A variable is only on CPU
758 Node* var_cpu = ops::SourceOp("VariableCPU", b.opts().WithName("var_cpu"));
759
760 // The constant to be assigned can be on both GPU or CPU.
761 //
762 // Because of the heuristic, it gets placed on CPU to avoid a
763 // copy.
764 Node* input = ops::SourceOp("TestCPUGPUOutput", b.opts().WithName("in"));
765
766 // The assign is bound to CPU by the reference edge.
767 ops::BinaryOp("TestAssign", var_cpu, input, b.opts().WithName("assign"));
768
769 TF_EXPECT_OK(BuildGraph(b, &g));
770 }
771
772 TF_EXPECT_OK(Place(&g));
773 EXPECT_COLOCATED(g, "var_cpu", "in");
774 EXPECT_COLOCATED(g, "assign", "in");
775 }
776
TEST_F(PlacerTest,TestIgnoreGeneratorHeuristicIfWrongDevice)777 TEST_F(PlacerTest, TestIgnoreGeneratorHeuristicIfWrongDevice) {
778 Graph g(OpRegistry::Global());
779 { // Scope for temporary variables used to construct g.
780 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
781
782 // A variable is only on CPU
783 Node* var_cpu = ops::SourceOp("VariableCPU", b.opts().WithName("var_cpu"));
784
785 // The constant to be assigned can only be on GPU.
786 //
787 // The heuristic to place the generator with its consumer does
788 // not apply since the consumer's device is not in the list
789 // of valid devices for the generator.
790 Node* input = ops::SourceOp("TestGPUOutput", b.opts().WithName("in"));
791
792 // The assign is bound to CPU by the reference edge.
793 ops::BinaryOp("TestAssign", var_cpu, input, b.opts().WithName("assign"));
794
795 TF_EXPECT_OK(BuildGraph(b, &g));
796 }
797
798 TF_EXPECT_OK(Place(&g));
799 EXPECT_DEVICE_TYPE(g, "in", "FakeGPU");
800 EXPECT_DEVICE_TYPE(g, "var_cpu", "FakeCPU");
801 EXPECT_COLOCATED(g, "var_cpu", "assign");
802 }
803
TEST_F(PlacerTest,TestIgnoreGeneratorHeuristicIfWrongPartialDevice)804 TEST_F(PlacerTest, TestIgnoreGeneratorHeuristicIfWrongPartialDevice) {
805 Graph g(OpRegistry::Global());
806 { // Scope for temporary variables used to construct g.
807 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
808
809 // A variable is only on CPU
810 Node* var_cpu = ops::SourceOp("VariableCPU", b.opts().WithName("var_cpu"));
811
812 // The constant to be assigned can be on CPU or GPU, but is explicitly
813 // placed on CPU:1.
814 //
815 // The heuristic to place the generator with its consumer does
816 // not apply since the consumer's device is not in the list
817 // of valid devices for the generator.
818 Node* input =
819 ops::SourceOp("TestCPUGPUOutput",
820 b.opts().WithName("in").WithDevice("/device:FakeCPU:1"));
821
822 // The assign is bound to CPU by the reference edge.
823 ops::BinaryOp("TestAssign", var_cpu, input, b.opts().WithName("assign"));
824
825 TF_EXPECT_OK(BuildGraph(b, &g));
826 }
827
828 TF_EXPECT_OK(Place(&g));
829 EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
830 EXPECT_DEVICE_CONTAINS(g, "in", "/device:FakeCPU:1");
831 EXPECT_DEVICE_TYPE(g, "var_cpu", "FakeCPU");
832 EXPECT_COLOCATED(g, "var_cpu", "assign");
833 EXPECT_DEVICE_CONTAINS(g, "var_cpu", "/device:FakeCPU:0");
834 }
835
836 // Test that a graph with partial device specifications on the ops
837 // will successfully
TEST_F(PlacerTest,TestPartialSpec)838 TEST_F(PlacerTest, TestPartialSpec) {
839 Graph g(OpRegistry::Global());
840 { // Scope for temporary variables used to construct g.
841 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
842 ops::SourceOp("TestInput", b.opts().WithName("in").WithDevice("/job:a"));
843 ops::SourceOp("TestVariable",
844 b.opts().WithName("var").WithDevice("/job:a"));
845 TF_EXPECT_OK(BuildGraph(b, &g));
846 }
847
848 TF_EXPECT_OK(Place(&g));
849 EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
850 EXPECT_DEVICE_CONTAINS(g, "in", "/job:a");
851 EXPECT_DEVICE_TYPE(g, "var", "FakeGPU");
852 EXPECT_DEVICE_CONTAINS(g, "var", "/job:a");
853 }
854
855 // Test that a node with a pre-assigned device is not relocated.
TEST_F(PlacerTest,TestAssignedDevicePreserved)856 TEST_F(PlacerTest, TestAssignedDevicePreserved) {
857 Graph g(OpRegistry::Global());
858 { // Scope for temporary variables used to construct g.
859 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
860 ops::SourceOp("TestInput", b.opts().WithName("in"));
861 TF_EXPECT_OK(BuildGraph(b, &g));
862 }
863
864 GetNodeByName(g, "in")->set_assigned_device_name(
865 "/job:a/replica:0/task:0/device:FakeCPU:7");
866
867 TF_EXPECT_OK(Place(&g));
868 EXPECT_EQ("/job:a/replica:0/task:0/device:FakeCPU:7",
869 GetNodeByName(g, "in")->assigned_device_name());
870 }
871
872 // Test that a graph with partial device specifications for CPU-only ops
873 // will be relocated to CPU.
TEST_F(PlacerTest,TestPartialSpecGpuToCpu)874 TEST_F(PlacerTest, TestPartialSpecGpuToCpu) {
875 Graph g(OpRegistry::Global());
876 { // Scope for temporary variables used to construct g.
877 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
878 ops::SourceOp("TestInput",
879 b.opts().WithName("in").WithDevice("/device:FakeGPU:0"));
880 ops::SourceOp("TestVariable",
881 b.opts().WithName("var").WithDevice("/device:FakeGPU:0"));
882 TF_EXPECT_OK(BuildGraph(b, &g));
883 }
884
885 TF_EXPECT_OK(Place(&g, true, false));
886 EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
887 EXPECT_DEVICE_CONTAINS(g, "in", "/device:FakeCPU");
888 EXPECT_DEVICE_TYPE(g, "var", "FakeGPU");
889 EXPECT_DEVICE_CONTAINS(g, "var", "/device:FakeGPU:0");
890 }
891
892 // Test that a resource with requested device will be moved to another
893 // device if it is processed by an op that is not supported on requested device.
TEST_F(PlacerTest,TestResourceMove)894 TEST_F(PlacerTest, TestResourceMove) {
895 Graph g(OpRegistry::Global());
896 {
897 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
898 Node* ds =
899 ops::SourceOp("CreateDatasetSP",
900 b.opts().WithName("ds").WithDevice("/device:FakeCPU:0"));
901 ops::UnaryOp("IteratorGPU", ops::NodeOut(ds, 0), b.opts().WithName("it"));
902 TF_EXPECT_OK(BuildGraph(b, &g));
903 }
904 TF_EXPECT_OK(Place(&g));
905 EXPECT_DEVICE_TYPE(g, "ds", "FakeGPU");
906 EXPECT_DEVICE_TYPE(g, "it", "FakeGPU");
907 }
908
909 // Test that a node with an assigned GPU device but has not registered
910 // OpKernel will fail.
TEST_F(PlacerTest,TestAssignedGpuDeviceToCpuDevice)911 TEST_F(PlacerTest, TestAssignedGpuDeviceToCpuDevice) {
912 Graph g(OpRegistry::Global());
913 { // Scope for temporary variables used to construct g.
914 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
915 ops::SourceOp("TestInput", b.opts().WithName("in"));
916 TF_EXPECT_OK(BuildGraph(b, &g));
917 }
918
919 GetNodeByName(g, "in")->set_assigned_device_name(
920 "/job:a/replica:0/task:0/device:FakeGPU:0");
921
922 Status s = Place(&g);
923 EXPECT_EQ(error::INTERNAL, s.code()) << s.ToString();
924 EXPECT_TRUE(absl::StrContains(
925 s.error_message(),
926 "Assigned device '/job:a/replica:0/task:0/device:FakeGPU:0' "
927 "does not have registered OpKernel support for TestInput"))
928 << s.ToString();
929 }
930
931 // Test that graphs with reference connections are correctly placed.
932
933 // Build a graph containing a Variable op of "variable_op_type" and an
934 // Assign op of "assign_op_type", and expect all of the ops to be
935 // placed on a device of type "expected_device_type".
ReferenceTestHelper(const string & variable_op_type,const string & assign_op_type,const DeviceType & expected_device_type)936 Status PlacerTest::ReferenceTestHelper(const string& variable_op_type,
937 const string& assign_op_type,
938 const DeviceType& expected_device_type) {
939 Graph g(OpRegistry::Global());
940 { // Scope for temporary variables used to construct g.
941 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
942 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
943 // Build ten variable-and-assignment pairs.
944 for (int i = 0; i < 10; ++i) {
945 Node* var = ops::SourceOp(variable_op_type,
946 b.opts().WithName(strings::StrCat("var_", i)));
947 ops::BinaryOp(assign_op_type, var, input,
948 b.opts().WithName(strings::StrCat("assign_", i)));
949 }
950 TF_EXPECT_OK(BuildGraph(b, &g));
951 }
952
953 TF_RETURN_IF_ERROR(Place(&g));
954
955 for (int i = 0; i < 10; ++i) {
956 EXPECT_COLOCATED(g, strings::StrCat("var_", i),
957 strings::StrCat("assign_", i));
958 EXPECT_DEVICE_TYPE(g, strings::StrCat("var_", i), expected_device_type);
959 EXPECT_DEVICE_TYPE(g, strings::StrCat("assign_", i), expected_device_type);
960 }
961
962 return OkStatus();
963 }
964
965 // Test all 2^3 combinations of Variable and Assignment op types
966 // (unconstrained, CPU-only, and GPU-only).
TEST_F(PlacerTest,TestReferenceConnection)967 TEST_F(PlacerTest, TestReferenceConnection) {
968 Status s;
969 TF_EXPECT_OK(ReferenceTestHelper("TestVariable", "TestAssign", "FakeGPU"));
970 TF_EXPECT_OK(ReferenceTestHelper("TestVariable", "AssignCPU", "FakeCPU"));
971 TF_EXPECT_OK(ReferenceTestHelper("TestVariable", "AssignGPU", "FakeGPU"));
972 TF_EXPECT_OK(ReferenceTestHelper("VariableCPU", "TestAssign", "FakeCPU"));
973 TF_EXPECT_OK(ReferenceTestHelper("VariableCPU", "AssignCPU", "FakeCPU"));
974 {
975 Status s = ReferenceTestHelper("VariableCPU", "AssignGPU", "FakeCPU");
976 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
977 EXPECT_TRUE(absl::StrContains(
978 s.error_message(), "no device type supports both of those nodes"));
979 }
980 TF_EXPECT_OK(ReferenceTestHelper("VariableGPU", "TestAssign", "FakeGPU"));
981 {
982 Status s = ReferenceTestHelper("VariableGPU", "AssignCPU", "FakeCPU");
983 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
984 EXPECT_TRUE(absl::StrContains(
985 s.error_message(), "no device type supports both of those nodes"));
986 }
987 TF_EXPECT_OK(ReferenceTestHelper("VariableGPU", "AssignGPU", "FakeGPU"));
988 }
989
990 // Handle-using dummy variable ops.
991 REGISTER_OP("TestHandleVariable").Output("o: resource");
992 REGISTER_KERNEL_BUILDER(Name("TestHandleVariable").Device("FakeCPU"), DummyOp);
993 REGISTER_KERNEL_BUILDER(Name("TestHandleVariable").Device("FakeGPU"), DummyOp);
994
995 REGISTER_OP("HandleVariableCPU").Output("o: resource");
996 REGISTER_KERNEL_BUILDER(Name("HandleVariableCPU").Device("FakeCPU"), DummyOp);
997
998 REGISTER_OP("HandleVariableGPU").Output("o: resource");
999 REGISTER_KERNEL_BUILDER(Name("HandleVariableGPU").Device("FakeGPU"), DummyOp);
1000
1001 REGISTER_OP("TestHandleAssign").Input("i: resource").Input("v: float");
1002 REGISTER_KERNEL_BUILDER(Name("TestHandleAssign").Device("FakeCPU"), DummyOp);
1003 REGISTER_KERNEL_BUILDER(Name("TestHandleAssign").Device("FakeGPU"), DummyOp);
1004
1005 REGISTER_OP("HandleAssignCPU").Input("i: resource").Input("v: float");
1006 REGISTER_KERNEL_BUILDER(Name("HandleAssignCPU").Device("FakeCPU"), DummyOp);
1007
1008 REGISTER_OP("HandleAssignGPU").Input("i: resource").Input("v: float");
1009 REGISTER_KERNEL_BUILDER(Name("HandleAssignGPU").Device("FakeGPU"), DummyOp);
1010
1011 REGISTER_OP("TestTwoHandlesIn").Input("i: resource").Input("j: resource");
1012 REGISTER_KERNEL_BUILDER(Name("TestTwoHandlesIn").Device("FakeCPU"), DummyOp);
1013 REGISTER_KERNEL_BUILDER(Name("TestTwoHandlesIn").Device("FakeGPU"), DummyOp);
1014
1015 // Tests all combinations of resource handles and ops using them.
TEST_F(PlacerTest,TestResourceHandle)1016 TEST_F(PlacerTest, TestResourceHandle) {
1017 auto handle_test = [this](const string& var_op_name,
1018 const string& use_op_name, DeviceType device) {
1019 Graph g(OpRegistry::Global());
1020 { // Scope for temporary variables used to construct g.
1021 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1022 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1023 Node* var = ops::SourceOp(var_op_name, b.opts().WithName("var"));
1024 ops::BinaryOp(use_op_name, var, input, b.opts().WithName("assign"));
1025 TF_EXPECT_OK(BuildGraph(b, &g));
1026 }
1027
1028 TF_RETURN_IF_ERROR(Place(&g));
1029
1030 EXPECT_COLOCATED(g, "var", "assign");
1031 EXPECT_DEVICE_TYPE(g, "var", device);
1032 EXPECT_DEVICE_TYPE(g, "assign", device);
1033 return OkStatus();
1034 };
1035 TF_EXPECT_OK(
1036 handle_test("TestHandleVariable", "TestHandleAssign", "FakeGPU"));
1037 TF_EXPECT_OK(handle_test("TestHandleVariable", "HandleAssignCPU", "FakeCPU"));
1038 TF_EXPECT_OK(handle_test("TestHandleVariable", "HandleAssignGPU", "FakeGPU"));
1039 TF_EXPECT_OK(handle_test("HandleVariableCPU", "TestHandleAssign", "FakeCPU"));
1040 TF_EXPECT_OK(handle_test("HandleVariableCPU", "HandleAssignCPU", "FakeCPU"));
1041 TF_EXPECT_OK(handle_test("HandleVariableGPU", "HandleAssignGPU", "FakeGPU"));
1042 TF_EXPECT_OK(handle_test("HandleVariableGPU", "TestHandleAssign", "FakeGPU"));
1043 EXPECT_FALSE(
1044 handle_test("HandleVariableGPU", "HandleAssignCPU", "FakeCPU").ok());
1045 EXPECT_FALSE(
1046 handle_test("HandleVariableCPU", "HandleAssignGPU", "FakeCPU").ok());
1047 }
1048
TEST_F(PlacerTest,TestResourceHandlesOnDifferentDevicesFails)1049 TEST_F(PlacerTest, TestResourceHandlesOnDifferentDevicesFails) {
1050 auto handle_test = [this](bool allow_soft_placement, bool set_assigned) {
1051 Graph g(OpRegistry::Global());
1052 { // Scope for temporary variables used to construct g.
1053 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1054 Node* var_cpu =
1055 ops::SourceOp("TestHandleVariable", b.opts().WithName("var_cpu"));
1056 Node* var_gpu =
1057 ops::SourceOp("TestHandleVariable", b.opts().WithName("var_gpu"));
1058 ops::BinaryOp("TestTwoHandlesIn", var_cpu, var_gpu,
1059 b.opts().WithName("two_handles_in"));
1060 TF_EXPECT_OK(BuildGraph(b, &g));
1061
1062 if (set_assigned) {
1063 GetNodeByName(g, "var_cpu")
1064 ->set_assigned_device_name(
1065 "/job:a/replica:0/task:0/device:FakeCPU:0");
1066 GetNodeByName(g, "var_gpu")
1067 ->set_assigned_device_name(
1068 "/job:a/replica:0/task:0/device:FakeGPU:0");
1069 } else {
1070 GetNodeByName(g, "var_cpu")
1071 ->set_requested_device("/job:a/replica:0/task:0/device:FakeCPU:0");
1072 GetNodeByName(g, "var_gpu")
1073 ->set_requested_device("/job:a/replica:0/task:0/device:FakeGPU:0");
1074 }
1075 }
1076
1077 Status s = Place(&g, allow_soft_placement, true);
1078 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
1079 if (set_assigned) {
1080 EXPECT_TRUE(absl::StrContains(
1081 s.error_message(),
1082 "Cannot place the graph because a reference or resource edge "
1083 "connects "
1084 "colocation groups with incompatible assigned devices: "
1085 "/job:a/replica:0/task:0/device:FakeGPU:0 vs "
1086 "/job:a/replica:0/task:0/device:FakeCPU:0"))
1087 << s.ToString();
1088 } else {
1089 EXPECT_TRUE(absl::StrContains(
1090 s.error_message(),
1091 "Cannot place the graph because a reference or resource edge "
1092 "connects "
1093 "colocation groups with incompatible resource devices: "
1094 "/job:a/replica:0/task:0/device:FakeGPU:0 vs "
1095 "/job:a/replica:0/task:0/device:FakeCPU:0"))
1096 << s.ToString();
1097 }
1098
1099 return OkStatus();
1100 };
1101
1102 TF_EXPECT_OK(handle_test(false, false));
1103 TF_EXPECT_OK(handle_test(false, true));
1104 TF_EXPECT_OK(handle_test(true, false));
1105 TF_EXPECT_OK(handle_test(true, true));
1106 }
1107
1108 // Test that an assignment of an operator to the wrong device
1109 // is ignored when it could never be satisfied (due to reference
1110 // edges, for example).
TEST_F(PlacerTest,TestReferenceConnectionIgnoreInfeasible)1111 TEST_F(PlacerTest, TestReferenceConnectionIgnoreInfeasible) {
1112 Status s;
1113 Graph g(OpRegistry::Global());
1114 {
1115 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1116 Node* input = ops::SourceOp(
1117 "TestDevice",
1118 b.opts().WithName("in").WithDevice("/job:a/task:0/device:FakeGPU:0"));
1119 Node* var =
1120 ops::SourceOp("TestVariable", b.opts().WithName("var_0").WithDevice(
1121 "/job:a/task:0/device:FakeGPU:0"));
1122
1123 // This op is specified on CPU, but in practice will be ignored,
1124 // because the reference edges forces it on GPU.
1125 ops::BinaryOp("TestAssign", var, input,
1126 b.opts().WithName("assign").WithDevice(
1127 "/job:a/task:0/device:FakeCPU:0"));
1128 TF_EXPECT_OK(BuildGraph(b, &g));
1129 }
1130
1131 s = Place(&g, false, false);
1132 TF_EXPECT_OK(s);
1133 EXPECT_DEVICE_TYPE(g, "var_0", "FakeGPU");
1134 EXPECT_DEVICE_TYPE(g, "assign", "FakeGPU");
1135 }
1136
1137 // Test that an assignment of an operator to the a more specified device
1138 // causes the device to maintain its more specific placement.
TEST_F(PlacerTest,TestReferenceConnectionMoreSpecificDestinationSourceWins)1139 TEST_F(PlacerTest, TestReferenceConnectionMoreSpecificDestinationSourceWins) {
1140 Status s;
1141 Graph g(OpRegistry::Global());
1142 {
1143 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1144 // Input can be on either device
1145 Node* input =
1146 ops::SourceOp("TestCPUGPUOutput",
1147 b.opts().WithName("in").WithDevice("/job:a/task:0"));
1148
1149 // Variable can be on either device
1150 Node* var = ops::SourceOp(
1151 "TestVariable", b.opts().WithName("var_0").WithDevice("/job:a/task:0"));
1152
1153 // This op is specified on CPU and is more specific than the variable.
1154 // Because the variable is less specified, the variable will be
1155 // assigned to CPU.
1156 ops::BinaryOp("TestAssign", var, input,
1157 b.opts().WithName("assign").WithDevice(
1158 "/job:a/task:0/device:FakeCPU:0"));
1159 TF_EXPECT_OK(BuildGraph(b, &g));
1160 }
1161
1162 s = Place(&g, false, false);
1163 TF_EXPECT_OK(s);
1164 EXPECT_DEVICE_TYPE(g, "var_0", "FakeCPU");
1165 EXPECT_DEVICE_TYPE(g, "assign", "FakeCPU");
1166 }
1167
1168 // A reference connection exists between a variable and an assign,
1169 // where the assign has a device but the variable does not. In this
1170 // case, the variable gets placed on the location of the assign
1171 // operation.
TEST_F(PlacerTest,TestReferenceConnectionNoSourceDevice)1172 TEST_F(PlacerTest, TestReferenceConnectionNoSourceDevice) {
1173 Status s;
1174 Graph g(OpRegistry::Global());
1175 {
1176 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1177 Node* input = ops::SourceOp(
1178 "TestDevice",
1179 b.opts().WithName("in").WithDevice("/job:a/task:0/device:FakeGPU:0"));
1180 Node* var = ops::SourceOp("TestVariable", b.opts().WithName("var_0"));
1181 ops::BinaryOp("TestAssign", var, input,
1182 b.opts().WithName("assign").WithDevice(
1183 "/job:a/task:0/device:FakeCPU:0"));
1184 TF_EXPECT_OK(BuildGraph(b, &g));
1185 }
1186
1187 s = Place(&g, false, false);
1188 TF_EXPECT_OK(s);
1189 EXPECT_DEVICE_TYPE(g, "var_0", "FakeCPU");
1190 EXPECT_DEVICE_TYPE(g, "assign", "FakeCPU");
1191 }
1192
TEST_F(PlacerTest,TestResourceHandleOnCompositeDevice)1193 TEST_F(PlacerTest, TestResourceHandleOnCompositeDevice) {
1194 auto build_graph = [this](Graph* g) -> Status {
1195 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1196 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1197 // Build ten variable-and-assignment pairs.
1198 Node* var = ops::SourceOp("HandleVariableCPU", b.opts().WithName("var"));
1199 ops::BinaryOp("TestHandleAssign", var, input, b.opts().WithName("assign"));
1200 TF_RETURN_IF_ERROR(BuildGraph(b, g));
1201 // `var` is assigned to COMPOSITE.
1202 GetNodeByName(*g, "var")->set_assigned_device_name(
1203 "/job:a/replica:0/task:0/device:COMPOSITE:0");
1204 return OkStatus();
1205 };
1206
1207 {
1208 // `assign` is not assigned to any device.
1209 Graph g(OpRegistry::Global());
1210 TF_ASSERT_OK(build_graph(&g));
1211 TF_ASSERT_OK(Place(&g));
1212 EXPECT_DEVICE_TYPE(g, "var", "COMPOSITE");
1213 EXPECT_DEVICE_TYPE(g, "assign", "COMPOSITE");
1214 }
1215 {
1216 // `assign` is assigned to FakeCPU.
1217 Graph g(OpRegistry::Global());
1218 TF_ASSERT_OK(build_graph(&g));
1219 GetNodeByName(g, "assign")
1220 ->set_assigned_device_name("/job:a/replica:0/task:0/device:FakeCPU:0");
1221 TF_ASSERT_OK(Place(&g));
1222 EXPECT_DEVICE_TYPE(g, "var", "COMPOSITE");
1223 EXPECT_DEVICE_TYPE(g, "assign", "FakeCPU");
1224 }
1225 }
1226
TEST_F(PlacerTest,TestColocationGroup)1227 TEST_F(PlacerTest, TestColocationGroup) {
1228 Graph g(OpRegistry::Global());
1229 { // Scope for temporary variables used to construct g.
1230 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1231 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1232 Node* colocated_with_input = ops::UnaryOp(
1233 "TestRelu", input,
1234 b.opts().WithName("colocated_1").WithAttr("_class", {"loc:@in"}));
1235
1236 // This will not be colocated with the input because TestInput is
1237 // only available on CPU and TestRelu will default to GPU.
1238 Node* not_colocated_with_input =
1239 ops::UnaryOp("TestRelu", input, b.opts().WithName("foo"));
1240 CHECK(colocated_with_input);
1241 CHECK(not_colocated_with_input);
1242 TF_EXPECT_OK(BuildGraph(b, &g));
1243 }
1244
1245 TF_EXPECT_OK(Place(&g));
1246 EXPECT_COLOCATED(g, "in", "colocated_1");
1247 EXPECT_NOT_COLOCATED(g, "in", "foo");
1248 }
1249
TEST_F(PlacerTest,TestMultipleColocationGroups)1250 TEST_F(PlacerTest, TestMultipleColocationGroups) {
1251 Graph g(OpRegistry::Global());
1252 { // Scope for temporary variables used to construct g.
1253 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1254 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1255 Node* colocated_with_input = ops::UnaryOp(
1256 "TestRelu", input,
1257 b.opts().WithName("colocated_1").WithAttr("_class", {"loc:@in"}));
1258 Node* colocated_with_input_and_other =
1259 ops::UnaryOp("TestRelu", input,
1260 b.opts().WithName("foo").WithAttr(
1261 "_class", {"loc:@in", "loc:@colocated_1"}));
1262 CHECK(colocated_with_input);
1263 CHECK(colocated_with_input_and_other);
1264 TF_EXPECT_OK(BuildGraph(b, &g));
1265 }
1266
1267 TF_EXPECT_OK(Place(&g));
1268 EXPECT_COLOCATED(g, "in", "colocated_1");
1269 EXPECT_COLOCATED(g, "in", "foo");
1270 }
1271
TEST_F(PlacerTest,TestChainColocation)1272 TEST_F(PlacerTest, TestChainColocation) {
1273 Graph g(OpRegistry::Global());
1274 { // Scope for temporary variables used to construct g.
1275 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1276 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1277 Node* colocated_with_input = ops::UnaryOp(
1278 "TestRelu", input,
1279 b.opts().WithName("colocated_1").WithAttr("_class", {"loc:@in"}));
1280 Node* colocated_with_input_and_other = ops::UnaryOp(
1281 "TestRelu", input,
1282 b.opts().WithName("foo").WithAttr("_class", {"loc:@colocated_1"}));
1283 CHECK(colocated_with_input);
1284 CHECK(colocated_with_input_and_other);
1285 TF_EXPECT_OK(BuildGraph(b, &g));
1286 }
1287
1288 TF_EXPECT_OK(Place(&g));
1289 EXPECT_COLOCATED(g, "in", "colocated_1");
1290 EXPECT_COLOCATED(g, "in", "foo");
1291 }
1292
TEST_P(SoftPlacementPlacerTest,TestInvalidMultipleColocationGroups)1293 TEST_P(SoftPlacementPlacerTest, TestInvalidMultipleColocationGroups) {
1294 Graph g(OpRegistry::Global());
1295 { // Scope for temporary variables used to construct g.
1296 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1297 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1298 Node* colocated_with_input = ops::UnaryOp(
1299 "ReluCPU", input,
1300 b.opts().WithName("colocated_1").WithAttr("_class", {"loc:@in"}));
1301 Node* colocated_with_input_and_other =
1302 ops::UnaryOp("ReluGPU", input,
1303 b.opts().WithName("foo").WithAttr(
1304 "_class", {"loc:@in", "loc:@colocated_1"}));
1305 CHECK(colocated_with_input);
1306 CHECK(colocated_with_input_and_other);
1307 TF_EXPECT_OK(BuildGraph(b, &g));
1308 }
1309
1310 bool allow_soft_placement = GetParam();
1311 Status s = Place(&g, allow_soft_placement, true);
1312 if (allow_soft_placement) {
1313 EXPECT_EQ(error::OK, s.code()) << s.ToString();
1314 EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
1315 EXPECT_DEVICE_TYPE(g, "colocated_1", "FakeCPU");
1316 EXPECT_DEVICE_TYPE(g, "foo", "FakeGPU");
1317 } else {
1318 EXPECT_TRUE(absl::StrContains(
1319 s.error_message(),
1320 "Cannot colocate nodes {{colocation_node foo}} and "
1321 "{{colocation_node in}} because no device type supports both of those "
1322 "nodes and the other nodes colocated with them"))
1323 << s.ToString();
1324 }
1325 }
1326
TEST_F(PlacerTest,TestColocationGroupWithReferenceConnections)1327 TEST_F(PlacerTest, TestColocationGroupWithReferenceConnections) {
1328 Graph g(OpRegistry::Global());
1329 { // Scope for temporary variables used to construct g.
1330 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1331 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1332 Node* var1 = ops::SourceOp("VariableCPU", b.opts().WithName("var1"));
1333 Node* var2 = ops::SourceOp("VariableCPU", b.opts().WithName("var2"));
1334 Node* var3 = ops::SourceOp(
1335 "VariableCPU",
1336 b.opts().WithName("var3").WithDevice("/device:COMPOSITE:0"));
1337
1338 // Two assigns (reference connections) with two different
1339 // colocation groups. Because their colocation groups all map to the
1340 // same device, this is a valid assignment.
1341 ops::BinaryOp(
1342 "TestAssign", var1, input,
1343 b.opts().WithName("assign1").WithAttr("_class", {"loc:@var1"}));
1344 ops::BinaryOp(
1345 "TestAssign", var2, input,
1346 b.opts().WithName("assign2").WithAttr("_class", {"loc:@var2"}));
1347 ops::BinaryOp(
1348 "TestAssign", var3, input,
1349 b.opts().WithName("assign3").WithAttr("_class", {"loc:@var3"}));
1350 TF_EXPECT_OK(BuildGraph(b, &g));
1351 }
1352
1353 TF_EXPECT_OK(Place(&g));
1354 EXPECT_DEVICE_TYPE(g, "in", "FakeCPU");
1355 EXPECT_COLOCATED(g, "in", "var1");
1356 EXPECT_COLOCATED(g, "in", "var2");
1357 EXPECT_COLOCATED(g, "var1", "assign2");
1358 EXPECT_COLOCATED(g, "var2", "assign1");
1359 EXPECT_DEVICE_TYPE(g, "var3", "COMPOSITE");
1360 EXPECT_COLOCATED(g, "var3", "assign3");
1361 }
1362
TEST_P(SoftPlacementPlacerTest,TestColocationGroupWithUnsatisfiableReferenceConnections)1363 TEST_P(SoftPlacementPlacerTest,
1364 TestColocationGroupWithUnsatisfiableReferenceConnections) {
1365 Graph g(OpRegistry::Global());
1366 { // Scope for temporary variables used to construct g.
1367 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1368 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1369
1370 Node* var1 = ops::SourceOp("VariableCPU", b.opts().WithName("var1"));
1371 Node* var2 = ops::SourceOp("VariableCPU", b.opts().WithName("var2"));
1372 // Var 3 is on GPU
1373 Node* var3 = ops::SourceOp("VariableGPU", b.opts().WithName("var3"));
1374
1375 // Two assigns (reference connections) with two different
1376 // colocation groups. Because their colocation groups all map to the
1377 // same device, this is a valid assignment.
1378 ops::BinaryOp(
1379 "TestAssign", var1, input,
1380 b.opts().WithName("assign1").WithAttr("_class", {"loc:@var1"}));
1381 ops::BinaryOp(
1382 "TestAssign", var2, input,
1383 b.opts().WithName("assign2").WithAttr("_class", {"loc:@var2"}));
1384 // Assign to var3, but try to use a colocation group that matches
1385 // the assign of var2. This should fail because assign2 must be on CPU
1386 // (it has a reference edge on var2), and assign3 must be on GPU,
1387 // hence the conflict.
1388 ops::BinaryOp(
1389 "TestAssign", var3, input,
1390 b.opts().WithName("assign3").WithAttr("_class", {"loc:@var2"}));
1391 TF_EXPECT_OK(BuildGraph(b, &g));
1392 }
1393
1394 bool allow_soft_placement = GetParam();
1395 Status s = Place(&g, allow_soft_placement, true);
1396 if (allow_soft_placement) {
1397 EXPECT_EQ(error::OK, s.code()) << s.ToString();
1398 } else {
1399 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
1400 EXPECT_TRUE(absl::StrContains(
1401 s.error_message(),
1402 "Cannot colocate nodes {{colocation_node assign3}} and "
1403 "{{colocation_node var2}} because no device type supports both of "
1404 "those nodes and the other nodes colocated with them."))
1405 << s.ToString();
1406 }
1407 }
1408
TEST_F(PlacerTest,TestColocationAndReferenceConnections)1409 TEST_F(PlacerTest, TestColocationAndReferenceConnections) {
1410 Graph g(OpRegistry::Global());
1411 { // Scope for temporary variables used to construct g.
1412 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1413 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1414 for (int i = 0; i < 10; ++i) {
1415 // Declare ten variable and assignment pairs.
1416 Node* var = ops::SourceOp("TestVariable",
1417 b.opts().WithName(strings::StrCat("var_", i)));
1418 ops::BinaryOp("TestAssign", var, input,
1419 b.opts().WithName(strings::StrCat("assign_", i)));
1420 }
1421 for (int i = 10; i < 100; ++i) {
1422 // Create a variable colocated with some existing variable, and
1423 // an assignment colocated with a possibly-different variable.
1424 Node* var = ops::SourceOp(
1425 "TestVariable",
1426 b.opts()
1427 .WithName(strings::StrCat("var_", i))
1428 .WithAttr("_class", {strings::StrCat("loc:@var_", i % 6)}));
1429 ops::BinaryOp(
1430 "TestAssign", var, input,
1431 b.opts()
1432 .WithName(strings::StrCat("assign_", i))
1433 .WithAttr("_class", {strings::StrCat("loc:@assign_", i % 3)}));
1434 }
1435 TF_EXPECT_OK(BuildGraph(b, &g));
1436 }
1437
1438 TF_EXPECT_OK(Place(&g));
1439 for (int i = 0; i < 10; ++i) {
1440 EXPECT_COLOCATED(g, strings::StrCat("var_", i),
1441 strings::StrCat("assign_", i));
1442 }
1443 for (int i = 10; i < 100; ++i) {
1444 EXPECT_COLOCATED(g, strings::StrCat("var_", i),
1445 strings::StrCat("assign_", i));
1446 EXPECT_COLOCATED(g, strings::StrCat("var_", i),
1447 strings::StrCat("var_", i % 6));
1448 EXPECT_COLOCATED(g, strings::StrCat("assign_", i),
1449 strings::StrCat("assign_", i % 3));
1450 }
1451 }
1452
1453 // Test that placement fails when no devices are registered.
TEST_F(PlacerTest,TestEmptyDeviceSet)1454 TEST_F(PlacerTest, TestEmptyDeviceSet) {
1455 Graph g(OpRegistry::Global());
1456 { // Scope for temporary variables used to construct g.
1457 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1458 ops::SourceOp("TestInput", b.opts().WithName("in"));
1459 TF_EXPECT_OK(BuildGraph(b, &g));
1460 }
1461
1462 DeviceSet empty;
1463
1464 Status s = Place(&g, &empty);
1465 EXPECT_TRUE(
1466 absl::StrContains(s.error_message(), "No devices are registered"));
1467 }
1468
1469 // Test that placement fails when the requested device forces an
1470 // indirect constraint to be violated.
TEST_F(PlacerTest,TestHeterogeneousDeviceSetFailure)1471 TEST_F(PlacerTest, TestHeterogeneousDeviceSetFailure) {
1472 Graph g(OpRegistry::Global());
1473 { // Scope for temporary variables used to construct g.
1474 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1475 Node* in = ops::SourceOp("TestInput", b.opts().WithName("in"));
1476 Node* var = ops::SourceOp("VariableGPU", b.opts().WithName("var"));
1477 ops::BinaryOp("TestAssign", var, in,
1478 b.opts().WithName("assign").WithDevice("/job:b/task:1"));
1479 TF_EXPECT_OK(BuildGraph(b, &g));
1480 }
1481
1482 DeviceSet heterogeneous;
1483 std::unique_ptr<Device> gpu(
1484 FakeDevice::MakeGPU("/job:b/replica:0/task:0/device:FakeGPU:0"));
1485 heterogeneous.AddDevice(gpu.get());
1486 std::unique_ptr<Device> cpu(
1487 FakeDevice::MakeCPU("/job:b/replica:0/task:1/device:FakeCPU:0"));
1488 heterogeneous.AddDevice(cpu.get());
1489 Status s = Place(&g, &heterogeneous);
1490 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1491 EXPECT_TRUE(absl::StrContains(s.error_message(),
1492 "colocated with a group of nodes that required "
1493 "incompatible device"));
1494
1495 // The error message should contain information that indicates which
1496 // op types have which registered device types.
1497 EXPECT_TRUE(absl::StrContains(s.error_message(), "VariableGPU: FakeGPU"))
1498 << s;
1499 EXPECT_TRUE(
1500 absl::StrContains(s.error_message(), "TestAssign: FakeGPU FakeCPU"))
1501 << s;
1502 }
1503
1504 // Test that placement fails when an unknown device is requested.
TEST_F(PlacerTest,TestUnknownDevice)1505 TEST_F(PlacerTest, TestUnknownDevice) {
1506 Graph g(OpRegistry::Global());
1507 { // Scope for temporary variables used to construct g.
1508 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1509 ops::SourceOp("TestInput", b.opts().WithName("in").WithDevice("/job:foo"));
1510 TF_EXPECT_OK(BuildGraph(b, &g));
1511 }
1512
1513 Status s = Place(&g);
1514 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1515 EXPECT_TRUE(absl::StrContains(s.error_message(), "/job:foo"));
1516 }
1517
1518 // Test that placement fails when the combination of partial
1519 // constraints leads to an unknown device.
TEST_F(PlacerTest,TestUnknownMergedDevice)1520 TEST_F(PlacerTest, TestUnknownMergedDevice) {
1521 Graph g(OpRegistry::Global());
1522 { // Scope for temporary variables used to construct g.
1523 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1524 ops::SourceOp("TestInput", b.opts().WithName("in").WithDevice("/job:foo"));
1525 TF_EXPECT_OK(BuildGraph(b, &g));
1526 }
1527
1528 Status s = Place(&g);
1529 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1530 EXPECT_TRUE(absl::StrContains(s.error_message(), "/job:foo"));
1531 }
1532
1533 // Test that placement fails when the previously-assigned device for a
1534 // node is unknown.
TEST_F(PlacerTest,TestUnknownAssignedDevice)1535 TEST_F(PlacerTest, TestUnknownAssignedDevice) {
1536 Graph g(OpRegistry::Global());
1537 { // Scope for temporary variables used to construct g.
1538 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1539 ops::SourceOp("TestInput", b.opts().WithName("in"));
1540 TF_EXPECT_OK(BuildGraph(b, &g));
1541 }
1542
1543 GetNodeByName(g, "in")->set_assigned_device_name("/job:foo");
1544
1545 Status s = Place(&g);
1546 EXPECT_EQ(error::INTERNAL, s.code());
1547 EXPECT_TRUE(absl::StrContains(
1548 s.error_message(),
1549 "Assigned device '/job:foo' does not match any device"));
1550 }
1551
1552 // Test that placement fails when an op with no registered kernels is
1553 // requested and no device is requested for the node
TEST_F(PlacerTest,TestNoKernelsRegisteredWithNoRequestedDevice)1554 TEST_F(PlacerTest, TestNoKernelsRegisteredWithNoRequestedDevice) {
1555 Graph g(OpRegistry::Global());
1556 { // Scope for temporary variables used to construct g.
1557 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1558 ops::SourceOp("VariableNoKernels", b.opts().WithName("var"));
1559 TF_EXPECT_OK(BuildGraph(b, &g));
1560 }
1561
1562 Status s = Place(&g);
1563 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1564 EXPECT_TRUE(absl::StrContains(s.error_message(),
1565 "No OpKernel was registered to support Op "
1566 "'VariableNoKernels' used by {{node var}}"));
1567 EXPECT_TRUE(absl::StrContains(s.error_message(), "<no registered kernels>"));
1568 }
1569
1570 // Test that placement fails when an op does not have registered kernel
1571 // and the requested device has the same (job, replica, task) as the placer's
1572 // local device
TEST_F(PlacerTest,TestNoKernelsRegisteredWithRequestedDeviceLocal)1573 TEST_F(PlacerTest, TestNoKernelsRegisteredWithRequestedDeviceLocal) {
1574 const string cpu_device = "/job:b/replica:0/task:0/device:FakeCPU:0";
1575 const string gpu_device = "/job:b/replica:0/task:0/device:FakeGPU:0";
1576
1577 Graph g(OpRegistry::Global());
1578 { // Scope for temporary variables used to construct g.
1579 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1580 ops::SourceOp("VariableNoKernels", b.opts().WithName("var"));
1581 TF_EXPECT_OK(BuildGraph(b, &g));
1582 }
1583 GetNodeByName(g, "var")->set_requested_device(gpu_device);
1584
1585 DeviceSet devices;
1586 std::unique_ptr<Device> gpu(FakeDevice::MakeGPU(gpu_device));
1587 devices.AddDevice(gpu.get());
1588 std::unique_ptr<Device> cpu(FakeDevice::MakeCPU(cpu_device));
1589 devices.AddDevice(cpu.get());
1590 Status s = Place(&g, &devices, cpu.get(), false, false);
1591 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1592 EXPECT_TRUE(absl::StrContains(s.error_message(),
1593 "No OpKernel was registered to support Op "
1594 "'VariableNoKernels' used by {{node var}}"));
1595 EXPECT_TRUE(absl::StrContains(s.error_message(), "<no registered kernels>"));
1596 }
1597
1598 // Test that placement succeeds when an op does not have registered kernel
1599 // and the requested device has different (job, replica, task) than the placer's
1600 // local device
TEST_F(PlacerTest,TestNoKernelsRegisteredWithRequestedDeviceRemote)1601 TEST_F(PlacerTest, TestNoKernelsRegisteredWithRequestedDeviceRemote) {
1602 const string local_device = "/job:b/replica:0/task:0/device:FakeCPU:0";
1603 const string remote_device = "/job:b/replica:0/task:1/device:FakeGPU:0";
1604
1605 Graph g(OpRegistry::Global());
1606 { // Scope for temporary variables used to construct g.
1607 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1608 ops::SourceOp("VariableNoKernels", b.opts().WithName("var"));
1609 TF_EXPECT_OK(BuildGraph(b, &g));
1610 }
1611 GetNodeByName(g, "var")->set_requested_device(remote_device);
1612
1613 DeviceSet heterogeneous;
1614 std::unique_ptr<Device> gpu(FakeDevice::MakeGPU(remote_device));
1615 heterogeneous.AddDevice(gpu.get());
1616 std::unique_ptr<Device> cpu(FakeDevice::MakeCPU(local_device));
1617 heterogeneous.AddDevice(cpu.get());
1618 TF_EXPECT_OK(Place(&g, &heterogeneous, cpu.get(), false, false));
1619 EXPECT_DEVICE_CONTAINS(g, "var", remote_device);
1620 }
1621
1622 // Test that placement fails when a kernel is registered but no known
1623 // device supports it.
TEST_F(PlacerTest,TestNoDevicesRegistered)1624 TEST_F(PlacerTest, TestNoDevicesRegistered) {
1625 Graph g(OpRegistry::Global());
1626 { // Scope for temporary variables used to construct g.
1627 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1628 ops::SourceOp("VariableGPU", b.opts().WithName("var"));
1629 TF_EXPECT_OK(BuildGraph(b, &g));
1630 }
1631
1632 DeviceSet cpu_only;
1633 std::unique_ptr<Device> cpu(
1634 FakeDevice::MakeCPU("/job:a/replica:0/task:0/device:FakeCPU:0"));
1635 cpu_only.AddDevice(cpu.get());
1636
1637 Status s = Place(&g, &cpu_only);
1638 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1639 EXPECT_TRUE(absl::StrContains(s.error_message(),
1640 "No OpKernel was registered to support Op "
1641 "'VariableGPU' used by {{node var}}"));
1642 EXPECT_TRUE(absl::StrContains(s.error_message(), "device='FakeGPU'"));
1643 }
1644
1645 // Test that placement fails when a requested device is malformed.
TEST_F(PlacerTest,TestMalformedDeviceSpecification)1646 TEST_F(PlacerTest, TestMalformedDeviceSpecification) {
1647 Graph g(OpRegistry::Global());
1648 { // Scope for temporary variables used to construct g.
1649 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1650 ops::SourceOp("TestInput", b.opts().WithName("in").WithDevice("/foo:bar"));
1651 TF_EXPECT_OK(BuildGraph(b, &g));
1652 }
1653
1654 Status s = Place(&g);
1655 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1656 EXPECT_TRUE(absl::StrContains(s.error_message(),
1657 "Malformed device specification '/foo:bar'"));
1658 }
1659
1660 // Test that placement fails when a previously-assigned device is malformed.
TEST_F(PlacerTest,TestMalformedAssignedDevice)1661 TEST_F(PlacerTest, TestMalformedAssignedDevice) {
1662 Graph g(OpRegistry::Global());
1663 { // Scope for temporary variables used to construct g.
1664 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1665 ops::SourceOp("TestInput", b.opts().WithName("in"));
1666 TF_EXPECT_OK(BuildGraph(b, &g));
1667 }
1668
1669 GetNodeByName(g, "in")->set_assigned_device_name("/foo:bar");
1670
1671 Status s = Place(&g);
1672 EXPECT_EQ(error::INTERNAL, s.code());
1673 EXPECT_TRUE(absl::StrContains(s.error_message(),
1674 "Malformed assigned device '/foo:bar'"));
1675 }
1676
1677 // Test that placement fails when a device was previously assigned to
1678 // a node, but it does not uniquely identify a particular device.
TEST_F(PlacerTest,TestNonUniqueAssignedDevice)1679 TEST_F(PlacerTest, TestNonUniqueAssignedDevice) {
1680 Graph g(OpRegistry::Global());
1681 { // Scope for temporary variables used to construct g.
1682 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1683 ops::SourceOp("TestInput", b.opts().WithName("in"));
1684 TF_EXPECT_OK(BuildGraph(b, &g));
1685 }
1686
1687 GetNodeByName(g, "in")->set_assigned_device_name("/job:a");
1688
1689 Status s = Place(&g);
1690 EXPECT_EQ(error::INTERNAL, s.code());
1691 EXPECT_TRUE(absl::StrContains(
1692 s.error_message(), "Assigned device '/job:a' does not match any device"));
1693 }
1694
1695 // Test that ops request to be placed on non-existent devices will be relocated
1696 // to existing device of the same type if allow_soft_placement is set.
TEST_F(PlacerTest,TestNonexistentGpuAllowSoftPlacement)1697 TEST_F(PlacerTest, TestNonexistentGpuAllowSoftPlacement) {
1698 Graph g(OpRegistry::Global());
1699 { // Scope for temporary variables used to construct g.
1700 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1701 ops::SourceOp("TestDevice",
1702 b.opts().WithName("in").WithDevice("/device:FakeGPU:11"));
1703 TF_EXPECT_OK(BuildGraph(b, &g));
1704 }
1705
1706 TF_EXPECT_OK(Place(&g, true, false));
1707 EXPECT_DEVICE_CONTAINS(g, "in", "/device:FakeGPU:0");
1708 }
1709
1710 // Test that ops request to be placed on non-existent devices will fail if
1711 // allow_soft_placement is not set.
TEST_F(PlacerTest,TestNonexistentGpuNoAllowSoftPlacement)1712 TEST_F(PlacerTest, TestNonexistentGpuNoAllowSoftPlacement) {
1713 Graph g(OpRegistry::Global());
1714 { // Scope for temporary variables used to construct g.
1715 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1716 ops::SourceOp("TestDevice",
1717 b.opts().WithName("in").WithDevice("/device:FakeGPU:11"));
1718 TF_EXPECT_OK(BuildGraph(b, &g));
1719 }
1720
1721 Status s = Place(&g, false, false);
1722 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1723 EXPECT_TRUE(absl::StrContains(s.error_message(), "/device:FakeGPU:11"));
1724 }
1725
1726 // Test that the "Cannot assign a device" error message contains a format tag
1727 // when requested.
TEST_F(PlacerTest,TestNonexistentGpuNoAllowSoftPlacementFormatTag)1728 TEST_F(PlacerTest, TestNonexistentGpuNoAllowSoftPlacementFormatTag) {
1729 Graph g(OpRegistry::Global());
1730 { // Scope for temporary variables used to construct g.
1731 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1732 ops::SourceOp("TestDevice",
1733 b.opts().WithName("in").WithDevice("/device:FakeGPU:11"));
1734 TF_EXPECT_OK(BuildGraph(b, &g));
1735 }
1736
1737 Status s = Place(&g, false, false);
1738 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1739 LOG(WARNING) << s.error_message();
1740 EXPECT_TRUE(absl::StrContains(s.error_message(),
1741 "Cannot assign a device for operation in"));
1742 EXPECT_TRUE(absl::StrContains(s.error_message(), "{{node in}}"));
1743 }
1744
1745 // Test that placement fails when a node requests an explicit device that is not
1746 // supported by the registered kernels if allow_soft_placement is no set.
TEST_F(PlacerTest,TestUnsupportedDeviceNoAllowSoftPlacement)1747 TEST_F(PlacerTest, TestUnsupportedDeviceNoAllowSoftPlacement) {
1748 Graph g(OpRegistry::Global());
1749 { // Scope for temporary variables used to construct g.
1750 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1751 ops::SourceOp("VariableGPU",
1752 b.opts().WithName("var").WithDevice("/device:FakeCPU:0"));
1753 TF_EXPECT_OK(BuildGraph(b, &g));
1754 }
1755
1756 Status s = Place(&g, false, false);
1757 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
1758 EXPECT_TRUE(absl::StrContains(s.error_message(), "/device:FakeCPU:0"))
1759 << s.ToString();
1760 EXPECT_TRUE(
1761 absl::StrContains(s.error_message(),
1762 "no supported kernel for FakeCPU devices is available"))
1763 << s.ToString();
1764 }
1765
1766 // Test that placement fails when a node requests an explicit device that is not
1767 // supported by the registered kernels if allow_soft_placement is no set.
TEST_F(PlacerTest,TestNonExistentDevice)1768 TEST_F(PlacerTest, TestNonExistentDevice) {
1769 Graph g(OpRegistry::Global());
1770 { // Scope for temporary variables used to construct g.
1771 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1772 ops::SourceOp("VariableGPU",
1773 b.opts().WithName("var").WithDevice("/job:foo/replica:17"));
1774 TF_EXPECT_OK(BuildGraph(b, &g));
1775 }
1776
1777 Status s = Place(&g, false, false);
1778 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1779 LOG(WARNING) << s.error_message();
1780 EXPECT_TRUE(absl::StrContains(
1781 s.error_message(), "was explicitly assigned to /job:foo/replica:17"));
1782 EXPECT_TRUE(absl::StrContains(s.error_message(), "but available devices"));
1783 }
1784
1785 #if !(GOOGLE_CUDA || TENSORFLOW_USE_ROCM)
1786 // Test that we inform the user if they appear to be explicitly placing nodes
1787 // on a GPU when CUDA is not available
TEST_F(PlacerTest,TestUseGpuWithNoCuda)1788 TEST_F(PlacerTest, TestUseGpuWithNoCuda) {
1789 Graph g(OpRegistry::Global());
1790 { // Scope for temporary variables used to construct g.
1791 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1792 ops::SourceOp("VariableGPU",
1793 b.opts().WithName("var").WithDevice("/device:gpu:0"));
1794 TF_EXPECT_OK(BuildGraph(b, &g));
1795 }
1796
1797 Status s = Place(&g, false, false);
1798 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1799 LOG(WARNING) << s.error_message();
1800 EXPECT_TRUE(absl::StrContains(
1801 s.error_message(),
1802 "The requested device appears to be a GPU, but CUDA is not enabled."));
1803 }
1804 #endif
1805
TEST_F(PlacerTest,TestUnsupportedDeviceAllowSoftPlacement)1806 TEST_F(PlacerTest, TestUnsupportedDeviceAllowSoftPlacement) {
1807 Graph g(OpRegistry::Global());
1808 { // Scope for temporary variables used to construct g.
1809 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1810 ops::SourceOp("TestInput", // has only CPU kernel
1811 b.opts().WithName("a").WithDevice("/device:FakeGPU:0"));
1812 TF_EXPECT_OK(BuildGraph(b, &g));
1813 }
1814
1815 TF_EXPECT_OK(Place(&g, true, false));
1816 }
1817
1818 // Test that a graph with device type and reference constraints on
1819 // some of the ops will successfully assign nodes to the constrained
1820 // device, and colocate nodes with reference connections.
TEST_F(PlacerTest,TestDeviceTypeConstraintsAllowSoftPlacement)1821 TEST_F(PlacerTest, TestDeviceTypeConstraintsAllowSoftPlacement) {
1822 Graph g(OpRegistry::Global());
1823 { // Scope for temporary variables used to construct g.
1824 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1825 // var_gpu has ref output and runs on GPU.
1826 // force_gpu takes var_gpu and requested CPU.
1827 // Verify that both are placed on GPU.
1828 Node* var_gpu = ops::SourceOp("VariableGPU", b.opts().WithName("var_gpu"));
1829 ops::UnaryOp(
1830 "TestDeviceEnforce", var_gpu,
1831 b.opts().WithName("force_gpu").WithDevice("/device:FakeCPU:0"));
1832 // var_cpu has ref output and runs on CPU.
1833 // force_cpu takes var_cpu and requested GPU.
1834 // Verify that both are placed on CPU.
1835 Node* var_cpu = ops::SourceOp("VariableCPU", b.opts().WithName("var_cpu"));
1836 ops::UnaryOp(
1837 "TestDeviceEnforce", var_cpu,
1838 b.opts().WithName("force_cpu").WithDevice("/device:FakeGPU:0"));
1839 TF_EXPECT_OK(BuildGraph(b, &g));
1840 }
1841
1842 TF_EXPECT_OK(Place(&g, true, false));
1843 EXPECT_DEVICE_TYPE(g, "var_gpu", "FakeGPU");
1844 EXPECT_DEVICE_TYPE(g, "force_gpu", "FakeGPU");
1845 EXPECT_COLOCATED(g, "var_gpu", "force_gpu");
1846 EXPECT_DEVICE_TYPE(g, "var_cpu", "FakeCPU");
1847 EXPECT_DEVICE_TYPE(g, "force_cpu", "FakeCPU");
1848 EXPECT_COLOCATED(g, "var_cpu", "force_cpu");
1849 }
1850
1851 // Test that placement fails when two nodes have a reference connection
1852 // constraint, and each node requires a mutually incompatible device.
TEST_F(PlacerTest,TestUnsatisfiableConstraintWithReferenceConnections)1853 TEST_F(PlacerTest, TestUnsatisfiableConstraintWithReferenceConnections) {
1854 Graph g(OpRegistry::Global());
1855 { // Scope for temporary variables used to construct g.
1856 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1857 Node* var = ops::SourceOp("VariableGPU", b.opts().WithName("var"));
1858 Node* input = ops::SourceOp("TestInput", b.opts().WithName("in"));
1859 ops::BinaryOp("AssignCPU", var, input, b.opts().WithName("assign"));
1860 TF_EXPECT_OK(BuildGraph(b, &g));
1861 }
1862
1863 Status s = Place(&g);
1864 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1865 EXPECT_TRUE(absl::StrContains(s.error_message(),
1866 "Cannot colocate nodes {{colocation_node "
1867 "var}} and {{colocation_node assign}}"));
1868 }
1869
1870 // Test that a generator node follows its consumers (where there are several
1871 // consumer nodes on the same devices).
TEST_F(PlacerTest,TestGeneratorNodeFollowsConsumerNode)1872 TEST_F(PlacerTest, TestGeneratorNodeFollowsConsumerNode) {
1873 Graph g(OpRegistry::Global());
1874 { // Scope for temporary variables used to construct g.
1875 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1876
1877 // A variable is only on CPU
1878 Node* var1_cpu =
1879 ops::SourceOp("VariableCPU", b.opts().WithName("var1_cpu"));
1880 Node* var2_cpu =
1881 ops::SourceOp("VariableCPU", b.opts().WithName("var2_cpu"));
1882
1883 // The constant to be assigned can be on both GPU or CPU.
1884 //
1885 // Because of the heuristic, it gets placed on CPU to avoid a
1886 // copy.
1887 Node* input = ops::SourceOp("TestCPUGPUOutput", b.opts().WithName("in"));
1888
1889 // The assigns are bound to CPU by the reference edge.
1890 ops::BinaryOp("TestAssign", var1_cpu, input, b.opts().WithName("assign1"));
1891 ops::BinaryOp("TestAssign", var2_cpu, input, b.opts().WithName("assign2"));
1892
1893 TF_EXPECT_OK(BuildGraph(b, &g));
1894 }
1895
1896 TF_EXPECT_OK(Place(&g));
1897 EXPECT_COLOCATED(g, "var1_cpu", "in");
1898 EXPECT_COLOCATED(g, "assign1", "in");
1899 EXPECT_COLOCATED(g, "var2_cpu", "in");
1900 EXPECT_COLOCATED(g, "assign2", "in");
1901 }
1902
1903 // Test that a generator node does not follow its consumers (where there are
1904 // several consumers on different devices).
TEST_F(PlacerTest,TestGeneratorNodeDoesntFollowNonColocatedConsumers)1905 TEST_F(PlacerTest, TestGeneratorNodeDoesntFollowNonColocatedConsumers) {
1906 Graph g(OpRegistry::Global());
1907 { // Scope for temporary variables used to construct g.
1908 GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
1909
1910 // A variable is only on CPU
1911 Node* var1_cpu =
1912 ops::SourceOp("VariableCPU", b.opts().WithName("var1_cpu"));
1913 Node* var2_cpu =
1914 ops::SourceOp("VariableCPU", b.opts().WithName("var2_cpu"));
1915
1916 // The constant to be assigned can be on both GPU or CPU.
1917 //
1918 // Because of the heuristic, it ought to be on the GPU (cannot be
1919 // co-located with both consumers, so goes to the 'standard' place)
1920 Node* input = ops::SourceOp("TestCPUGPUOutput", b.opts().WithName("in"));
1921
1922 // The assigns are bound to CPU by the reference edge.
1923 ops::BinaryOp("TestAssign", var1_cpu, input, b.opts().WithName("assign1"));
1924 ops::BinaryOp("TestAssign", var2_cpu, input, b.opts().WithName("assign2"));
1925
1926 TF_EXPECT_OK(BuildGraph(b, &g));
1927
1928 GetNodeByName(g, "var1_cpu")
1929 ->set_assigned_device_name("/job:a/replica:0/task:0/device:FakeCPU:1");
1930
1931 GetNodeByName(g, "var2_cpu")
1932 ->set_assigned_device_name("/job:a/replica:0/task:0/device:FakeCPU:2");
1933 }
1934
1935 TF_EXPECT_OK(Place(&g));
1936 EXPECT_COLOCATED(g, "assign1", "var1_cpu");
1937 EXPECT_COLOCATED(g, "assign2", "var2_cpu");
1938 EXPECT_DEVICE_TYPE(g, "in", "FakeGPU");
1939 }
1940
1941 REGISTER_KERNEL_BUILDER(Name("_Arg").Device("FakeCPU"), DummyOp);
1942 REGISTER_KERNEL_BUILDER(Name("_Arg").Device("FakeGPU"), DummyOp);
1943 REGISTER_KERNEL_BUILDER(Name("_Retval").Device("FakeCPU"), DummyOp);
1944 REGISTER_KERNEL_BUILDER(Name("_Retval").Device("FakeGPU"), DummyOp);
1945 REGISTER_KERNEL_BUILDER(Name("Identity").Device("FakeCPU"), DummyOp);
1946 REGISTER_KERNEL_BUILDER(Name("Identity").Device("FakeGPU"), DummyOp);
1947 REGISTER_KERNEL_BUILDER(Name("Const").Device("FakeCPU"), DummyOp);
1948 REGISTER_KERNEL_BUILDER(Name("Const").Device("FakeGPU"), DummyOp);
1949 REGISTER_KERNEL_BUILDER(Name("Mul").Device("FakeCPU"), DummyOp);
1950 REGISTER_KERNEL_BUILDER(Name("Mul").Device("FakeGPU"), DummyOp);
1951 REGISTER_KERNEL_BUILDER(Name("Add").Device("FakeCPU"), DummyOp);
1952 REGISTER_KERNEL_BUILDER(Name("Add").Device("FakeGPU"), DummyOp);
1953 REGISTER_KERNEL_BUILDER(Name("PartitionedCall").Device("FakeCPU"), DummyOp);
1954 REGISTER_KERNEL_BUILDER(Name("PartitionedCall").Device("FakeGPU"), DummyOp);
1955
TEST_P(SoftPlacementPlacerTest,RequestedDeviceOnResourceGeneratorIsTreatedAsAssigned)1956 TEST_P(SoftPlacementPlacerTest,
1957 RequestedDeviceOnResourceGeneratorIsTreatedAsAssigned) {
1958 /*
1959 * a:RES:GPU b:RES:CPU
1960 * | |
1961 * | |
1962 * v v
1963 * id1 id2
1964 * @loc:id2
1965 */
1966 FunctionDef func = test::function::ResourceOutput();
1967 GraphDef graph = GDef(
1968 {
1969 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}, kGPU),
1970 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
1971 NDef("id1", "Identity", {"a"},
1972 {{"T", DT_RESOURCE},
1973 {"_class", gtl::ArraySlice<string>({"loc:@id2"})}}),
1974 NDef("id2", "Identity", {"b"}, {{"T", DT_RESOURCE}}),
1975 },
1976 // FunctionLib
1977 {func});
1978
1979 Graph g(OpRegistry::Global());
1980 TF_ASSERT_OK(BuildGraph(graph, &g));
1981
1982 bool allow_soft_placement = GetParam();
1983 Status s = Place(&g, allow_soft_placement, true);
1984 if (allow_soft_placement) {
1985 EXPECT_EQ(error::OK, s.code()) << s.ToString();
1986 EXPECT_DEVICE_TYPE(g, "a", "FakeGPU");
1987 EXPECT_DEVICE_TYPE(g, "id1", "FakeGPU");
1988 EXPECT_DEVICE_TYPE(g, "b", "FakeCPU");
1989 EXPECT_DEVICE_TYPE(g, "id2", "FakeCPU");
1990 } else {
1991 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
1992 EXPECT_TRUE(absl::StrContains(
1993 s.error_message(),
1994 "Cannot colocate nodes {{colocation_node id2}} and {{colocation_node "
1995 "id1}}: Cannot merge devices with incompatible types: "
1996 "'/device:FakeCPU:0' and '/device:FakeGPU:0'"))
1997 << s.ToString();
1998 }
1999 }
2000
TEST_F(PlacerTest,RequestedDeviceCanBeOverridden)2001 TEST_F(PlacerTest, RequestedDeviceCanBeOverridden) {
2002 /*
2003 * a:RES b:RES
2004 * | |
2005 * id_a:GPU id_b:CPU
2006 * | |
2007 * v v
2008 * id1 id2
2009 * @loc:id2
2010 */
2011 FunctionDef func = test::function::ResourceOutput();
2012 GraphDef graph = GDef(
2013 {
2014 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
2015 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}),
2016 NDef("id_a", "Identity", {"a"}, {{"T", DT_RESOURCE}}, kGPU),
2017 NDef("id_b", "Identity", {"b"}, {{"T", DT_RESOURCE}}, kCPU),
2018 NDef("id1", "Identity", {"id_a"},
2019 {{"T", DT_RESOURCE},
2020 {"_class", gtl::ArraySlice<string>({"loc:@id2"})}}),
2021 NDef("id2", "Identity", {"id_b"}, {{"T", DT_RESOURCE}}),
2022 },
2023 // FunctionLib
2024 {func});
2025
2026 Graph g(OpRegistry::Global());
2027 TF_ASSERT_OK(BuildGraph(graph, &g));
2028 TF_ASSERT_OK(Place(&g));
2029
2030 // All should be colocated
2031 EXPECT_COLOCATED(g, "a", "b");
2032 EXPECT_COLOCATED(g, "id_a", "id_b");
2033 EXPECT_COLOCATED(g, "id1", "id2");
2034 EXPECT_COLOCATED(g, "a", "id_a");
2035 EXPECT_COLOCATED(g, "a", "id1");
2036 }
2037
TEST_F(PlacerTest,AssignedDeviceOfColocatedNodeIsRespected)2038 TEST_F(PlacerTest, AssignedDeviceOfColocatedNodeIsRespected) {
2039 /*
2040 * a:float (assigned to CPU)
2041 * |
2042 * v
2043 * iter (has only GPU kernel)
2044 */
2045 GraphDef graph = GDef({
2046 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
2047 NDef("iter", "IteratorGPU", {"a"}),
2048 });
2049
2050 Graph g(OpRegistry::Global());
2051 TF_ASSERT_OK(BuildGraph(graph, &g));
2052 GetNodeByName(g, "a")->set_assigned_device_name(kFullCPU);
2053 Status s = Place(&g);
2054 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
2055 EXPECT_TRUE(
2056 absl::StrContains(s.error_message(),
2057 "{{colocation_node iter}} was colocated with a "
2058 "group of nodes that required incompatible device "
2059 "'/job:a/replica:0/task:0/device:FakeCPU:0'"))
2060 << s.ToString();
2061 }
2062
TEST_P(SoftPlacementPlacerTest,AssignedDevicesAreNotOverriddenDueToResourcesAndColocation)2063 TEST_P(SoftPlacementPlacerTest,
2064 AssignedDevicesAreNotOverriddenDueToResourcesAndColocation) {
2065 /*
2066 * a:RES b:RES
2067 * | |
2068 * id_a:GPU id_b:CPU
2069 * | |
2070 * v v
2071 * id1 id2
2072 * @loc:id2
2073 */
2074 FunctionDef func = test::function::ResourceOutput();
2075 GraphDef graph = GDef(
2076 {
2077 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
2078 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}),
2079 NDef("id_a", "Identity", {"a"}, {{"T", DT_RESOURCE}}),
2080 NDef("id_b", "Identity", {"b"}, {{"T", DT_RESOURCE}}),
2081 NDef("id1", "Identity", {"id_a"},
2082 {{"T", DT_RESOURCE},
2083 {"_class", gtl::ArraySlice<string>({"loc:@id2"})}}),
2084 NDef("id2", "Identity", {"id_b"}, {{"T", DT_RESOURCE}}),
2085 },
2086 // FunctionLib
2087 {func});
2088
2089 Graph g(OpRegistry::Global());
2090 TF_ASSERT_OK(BuildGraph(graph, &g));
2091 GetNodeByName(g, "id_a")->set_assigned_device_name(kFullGPU);
2092 GetNodeByName(g, "id_b")->set_assigned_device_name(kFullCPU);
2093
2094 bool allow_soft_placement = GetParam();
2095
2096 Status s = Place(&g, allow_soft_placement, false);
2097 if (allow_soft_placement) {
2098 EXPECT_EQ(error::OK, s.code()) << s.ToString();
2099 EXPECT_DEVICE_TYPE(g, "a", "FakeGPU");
2100 EXPECT_DEVICE_TYPE(g, "id_a", "FakeGPU");
2101 EXPECT_DEVICE_TYPE(g, "id1", "FakeGPU");
2102 EXPECT_DEVICE_TYPE(g, "b", "FakeCPU");
2103 EXPECT_DEVICE_TYPE(g, "id_b", "FakeCPU");
2104 EXPECT_DEVICE_TYPE(g, "id2", "FakeCPU");
2105 } else {
2106 EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
2107 EXPECT_TRUE(absl::StrContains(
2108 s.error_message(),
2109 "Cannot colocate nodes {{colocation_node id2}} and {{colocation_node "
2110 "id1}}: Cannot merge devices with incompatible types: "
2111 "'/job:a/replica:0/task:0/device:FakeCPU:0' and "
2112 "'/job:a/replica:0/task:0/device:FakeGPU:0'"))
2113 << s.ToString();
2114 }
2115 }
2116
2117 // Fixture for tests that place graphs containing function calls.
2118 // Particularly the case where internal functions return resources.
2119 class NestedPlacerTest : public PlacerTest {
2120 public:
2121 // Create one FakeCPU and one FakeGPU. These tests don't need multiple devices
2122 // of the same type.
NestedPlacerTest()2123 NestedPlacerTest() : PlacerTest(1) {}
2124 };
2125
TEST_F(NestedPlacerTest,OutputOneResource)2126 TEST_F(NestedPlacerTest, OutputOneResource) {
2127 /*
2128 * a:FLOAT:GPU
2129 * | b:RESOURCE:CPU
2130 * | |
2131 * v v
2132 * PCO
2133 * | \
2134 * | v
2135 * v r2:FLOAT
2136 * r1:RESOURCE
2137 *
2138 * PartitionedCallOp (PCO) should be placed on GPU even through it
2139 * takes a CPU resource as input. The resource output should be placed
2140 * on CPU since it is the same resource as the input one.
2141 */
2142 FunctionDef func = test::function::ResourceOutput();
2143 GraphDef graph = GDef(
2144 {
2145 NDef("a", "_Arg", {}, {{"T", DT_FLOAT}}, kGPU),
2146 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2147 NDef("y", "PartitionedCall", {"a", "b"},
2148 {{"Tin", DataTypeSlice{DT_FLOAT, DT_RESOURCE}},
2149 {"Tout", DataTypeSlice{DT_RESOURCE, DT_FLOAT}},
2150 {"f", FDH::FunctionRef("ResourceOutput", {})}}),
2151 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2152 NDef("r2", "Identity", {"y:1"}, {{"T", DT_FLOAT}}),
2153 },
2154 // FunctionLib
2155 {func});
2156
2157 Graph g(OpRegistry::Global());
2158 TF_ASSERT_OK(BuildGraph(graph, &g));
2159 TF_ASSERT_OK(CallOptPassesAndPlace(&g));
2160
2161 EXPECT_DEVICE_TYPE(g, "y", "FakeGPU");
2162 EXPECT_DEVICE_TYPE(g, "r1", "FakeCPU");
2163 EXPECT_DEVICE_TYPE(g, "r2", "FakeGPU");
2164 }
2165
TEST_F(NestedPlacerTest,OutputOneResource_ExtraIdentities)2166 TEST_F(NestedPlacerTest, OutputOneResource_ExtraIdentities) {
2167 /*
2168 * a:FLOAT
2169 * | b:RESOURCE
2170 * | |
2171 * ai:GPU |
2172 * | bi:CPU
2173 * | |
2174 * v v
2175 * PCO
2176 * | \
2177 * | v
2178 * v r2:FLOAT
2179 * r1:RESOURCE
2180 *
2181 * Same as above except that devices are requested on identities, not on
2182 * resource generating ops.
2183 */
2184 FunctionDef func = test::function::ResourceOutput();
2185 GraphDef graph = GDef(
2186 {
2187 NDef("a", "_Arg", {}, {{"T", DT_FLOAT}}, kGPU),
2188 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2189 NDef("ai", "Identity", {"a"}, {{"T", DT_FLOAT}}),
2190 NDef("bi", "Identity", {"b"}, {{"T", DT_RESOURCE}}),
2191 NDef("y", "PartitionedCall", {"ai", "bi"},
2192 {{"Tin", DataTypeSlice{DT_FLOAT, DT_RESOURCE}},
2193 {"Tout", DataTypeSlice{DT_RESOURCE, DT_FLOAT}},
2194 {"f", FDH::FunctionRef("ResourceOutput", {})}}),
2195 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2196 NDef("r2", "Identity", {"y:1"}, {{"T", DT_FLOAT}}),
2197 },
2198 // FunctionLib
2199 {func});
2200
2201 Graph g(OpRegistry::Global());
2202 TF_ASSERT_OK(BuildGraph(graph, &g));
2203 TF_ASSERT_OK(CallOptPassesAndPlace(&g));
2204
2205 EXPECT_DEVICE_TYPE(g, "a", "FakeGPU");
2206 EXPECT_DEVICE_TYPE(g, "b", "FakeCPU");
2207 EXPECT_DEVICE_TYPE(g, "ai", "FakeGPU");
2208 EXPECT_DEVICE_TYPE(g, "bi", "FakeCPU");
2209 EXPECT_DEVICE_TYPE(g, "y", "FakeGPU");
2210 EXPECT_DEVICE_TYPE(g, "r1", "FakeCPU");
2211 EXPECT_DEVICE_TYPE(g, "r2", "FakeGPU");
2212 }
2213
TEST_F(NestedPlacerTest,OutputOneResource_OverrideOutputResourceDevice)2214 TEST_F(NestedPlacerTest, OutputOneResource_OverrideOutputResourceDevice) {
2215 /*
2216 * a:FLOAT:GPU
2217 * | b:RESOURCE:CPU
2218 * | |
2219 * v v
2220 * PCO
2221 * | \
2222 * | v
2223 * v r2:FLOAT
2224 * r1:RESOURCE:GPU
2225 *
2226 * Same as above except r1 is wrongly assigned on GPU. Placer will override
2227 * this device assignment.
2228 */
2229 FunctionDef func = test::function::ResourceOutput();
2230 GraphDef graph = GDef(
2231 {
2232 NDef("a", "_Arg", {}, {{"T", DT_FLOAT}}, kGPU),
2233 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2234 NDef("y", "PartitionedCall", {"a", "b"},
2235 {{"Tin", DataTypeSlice{DT_FLOAT, DT_RESOURCE}},
2236 {"Tout", DataTypeSlice{DT_RESOURCE, DT_FLOAT}},
2237 {"f", FDH::FunctionRef("ResourceOutput", {})}}),
2238 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}, kGPU),
2239 NDef("r2", "Identity", {"y:1"}, {{"T", DT_FLOAT}}),
2240 },
2241 // FunctionLib
2242 {func});
2243
2244 Graph g(OpRegistry::Global());
2245 TF_ASSERT_OK(BuildGraph(graph, &g));
2246 TF_ASSERT_OK(CallOptPassesAndPlace(&g, false, true));
2247
2248 EXPECT_DEVICE_TYPE(g, "y", "FakeGPU");
2249 EXPECT_DEVICE_TYPE(g, "r1", "FakeCPU");
2250 EXPECT_DEVICE_TYPE(g, "r2", "FakeGPU");
2251 }
2252
TEST_F(NestedPlacerTest,OutputTwoResources)2253 TEST_F(NestedPlacerTest, OutputTwoResources) {
2254 /*
2255 * a:RESOURCE:CPU
2256 * | b:RESOURCE:GPU
2257 * | |
2258 * v v
2259 * PCO (simple swap)
2260 * | \
2261 * | v
2262 * v r2:RESOURCE
2263 * r1:RESOURCE
2264 *
2265 * Ops consuming output resources should be placed on correct devices.
2266 */
2267 FunctionDef func = test::function::Swap();
2268 GraphDef graph = GDef(
2269 {
2270 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2271 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kGPU),
2272 NDef("y", "PartitionedCall", {"a", "b"},
2273 {{"Tin", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2274 {"Tout", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2275 {"f", FDH::FunctionRef("Swap", {{"T", DT_RESOURCE}})}}),
2276 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2277 NDef("r2", "Identity", {"y:1"}, {{"T", DT_RESOURCE}}),
2278 },
2279 // FunctionLib
2280 {func});
2281
2282 Graph g(OpRegistry::Global());
2283 TF_EXPECT_OK(BuildGraph(graph, &g));
2284 TF_EXPECT_OK(CallOptPassesAndPlace(&g));
2285
2286 EXPECT_DEVICE_TYPE(g, "y", "FakeGPU");
2287 EXPECT_DEVICE_TYPE(g, "r1", "FakeGPU");
2288 EXPECT_DEVICE_TYPE(g, "r2", "FakeCPU");
2289 }
2290
TEST_F(NestedPlacerTest,OutputTwoResources_PCOOnCPU)2291 TEST_F(NestedPlacerTest, OutputTwoResources_PCOOnCPU) {
2292 /*
2293 * a:RESOURCE:CPU
2294 * | b:RESOURCE:GPU
2295 * | |
2296 * v v
2297 * PCO:CPU (simple swap)
2298 * | \
2299 * | v
2300 * v r2:RESOURCE
2301 * r1:RESOURCE
2302 *
2303 * Ops consuming output resources should be placed on correct devices, even
2304 * when PCO is explicitly placed.
2305 */
2306 FunctionDef func = test::function::Swap();
2307 GraphDef graph = GDef(
2308 {
2309 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2310 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kGPU),
2311 NDef("y", "PartitionedCall", {"a", "b"},
2312 {{"Tin", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2313 {"Tout", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2314 {"f", FDH::FunctionRef("Swap", {{"T", DT_RESOURCE}})}},
2315 kCPU),
2316 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2317 NDef("r2", "Identity", {"y:1"}, {{"T", DT_RESOURCE}}),
2318 },
2319 // FunctionLib
2320 {func});
2321
2322 Graph g(OpRegistry::Global());
2323 TF_EXPECT_OK(BuildGraph(graph, &g));
2324 TF_EXPECT_OK(CallOptPassesAndPlace(&g));
2325
2326 EXPECT_DEVICE_TYPE(g, "y", "FakeCPU");
2327 EXPECT_DEVICE_TYPE(g, "r1", "FakeGPU");
2328 EXPECT_DEVICE_TYPE(g, "r2", "FakeCPU");
2329 }
2330
TEST_F(NestedPlacerTest,OutputTwoResources_UnassignedResource)2331 TEST_F(NestedPlacerTest, OutputTwoResources_UnassignedResource) {
2332 /*
2333 * a:RESOURCE
2334 * | b:RESOURCE:GPU
2335 * | |
2336 * v v
2337 * PCO:CPU (simple swap)
2338 * | \
2339 * | v
2340 * v r2:RESOURCE
2341 * r1:RESOURCE
2342 *
2343 * Resource input `a` is not explicitly assigned. Placer leaves `a` and `b` to
2344 * the "second pass" as they are "sources". It assigns `r1` to GPU because it
2345 * is in the same group as `b`. It assigns `r2` to GPU because GPU has a
2346 * higher device preference. Finally, `a` is assigned to GPU because `r2` is
2347 * on GPU - this test that the "second pass" heuristics respect colocation
2348 * groups (even when the consumer of the source, i.e. PCO is on a different
2349 * device).
2350 */
2351 FunctionDef func = test::function::Swap();
2352 GraphDef graph = GDef(
2353 {
2354 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
2355 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kGPU),
2356 NDef("y", "PartitionedCall", {"a", "b"},
2357 {{"Tin", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2358 {"Tout", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2359 {"f", FDH::FunctionRef("Swap", {{"T", DT_RESOURCE}})}},
2360 kCPU),
2361 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2362 NDef("r2", "Identity", {"y:1"}, {{"T", DT_RESOURCE}}),
2363 },
2364 // FunctionLib
2365 {func});
2366
2367 Graph g(OpRegistry::Global());
2368 TF_EXPECT_OK(BuildGraph(graph, &g));
2369 TF_ASSERT_OK(CallOptPassesAndPlace(&g, false, true));
2370
2371 EXPECT_DEVICE_TYPE(g, "a", "FakeGPU");
2372 EXPECT_DEVICE_TYPE(g, "b", "FakeGPU");
2373 EXPECT_DEVICE_TYPE(g, "y", "FakeCPU");
2374 EXPECT_DEVICE_TYPE(g, "r1", "FakeGPU");
2375 EXPECT_DEVICE_TYPE(g, "r2", "FakeGPU");
2376 }
2377
TEST_F(NestedPlacerTest,OutputTwoResources_UnassignedResource_CPU)2378 TEST_F(NestedPlacerTest, OutputTwoResources_UnassignedResource_CPU) {
2379 /*
2380 * a:RESOURCE
2381 * | b:RESOURCE:CPU
2382 * | |
2383 * v v
2384 * PCO:CPU (simple swap)
2385 * | \
2386 * | v
2387 * v r2:RESOURCE
2388 * r1:RESOURCE
2389 *
2390 * Same as above except `b` is on CPU.
2391 */
2392 FunctionDef func = test::function::Swap();
2393 GraphDef graph = GDef(
2394 {
2395 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
2396 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2397 NDef("y", "PartitionedCall", {"a", "b"},
2398 {{"Tin", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2399 {"Tout", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2400 {"f", FDH::FunctionRef("Swap", {{"T", DT_RESOURCE}})}},
2401 kCPU),
2402 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2403 NDef("r2", "Identity", {"y:1"}, {{"T", DT_RESOURCE}}),
2404 },
2405 // FunctionLib
2406 {func});
2407
2408 Graph g(OpRegistry::Global());
2409 TF_EXPECT_OK(BuildGraph(graph, &g));
2410 TF_ASSERT_OK(CallOptPassesAndPlace(&g, false, true));
2411
2412 EXPECT_DEVICE_TYPE(g, "a", "FakeGPU");
2413 EXPECT_DEVICE_TYPE(g, "b", "FakeCPU");
2414 EXPECT_DEVICE_TYPE(g, "y", "FakeCPU");
2415 EXPECT_DEVICE_TYPE(g, "r1", "FakeCPU");
2416 EXPECT_DEVICE_TYPE(g, "r2", "FakeGPU");
2417 }
2418
TEST_F(NestedPlacerTest,OutputResourceConsumedByMultipleOps)2419 TEST_F(NestedPlacerTest, OutputResourceConsumedByMultipleOps) {
2420 /*
2421 * a:RESOURCE
2422 * | b:RESOURCE:CPU
2423 * | |
2424 * v v
2425 * PCO:CPU (simple swap)
2426 * | \
2427 * | v
2428 * | r3:RESOURCE:GPU
2429 * |
2430 * ---+---
2431 * | |
2432 * | r2:RESOURCE
2433 * r1:RESOURCE
2434 */
2435 FunctionDef func = test::function::Swap();
2436 GraphDef graph = GDef(
2437 {
2438 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
2439 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2440 NDef("y", "PartitionedCall", {"a", "b"},
2441 {{"Tin", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2442 {"Tout", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2443 {"f", FDH::FunctionRef("Swap", {{"T", DT_RESOURCE}})}}),
2444 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2445 NDef("r2", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2446 NDef("r3", "Identity", {"y:1"}, {{"T", DT_RESOURCE}}, kGPU),
2447 },
2448 // FunctionLib
2449 {func});
2450
2451 Graph g(OpRegistry::Global());
2452 TF_EXPECT_OK(BuildGraph(graph, &g));
2453 TF_ASSERT_OK(CallOptPassesAndPlace(&g, false, true));
2454
2455 EXPECT_DEVICE_TYPE(g, "a", "FakeGPU");
2456 EXPECT_DEVICE_TYPE(g, "b", "FakeCPU");
2457 EXPECT_DEVICE_TYPE(g, "r1", "FakeCPU");
2458 EXPECT_DEVICE_TYPE(g, "r2", "FakeCPU");
2459 EXPECT_DEVICE_TYPE(g, "r3", "FakeGPU");
2460 }
2461
TEST_F(NestedPlacerTest,DuplicateInputResource)2462 TEST_F(NestedPlacerTest, DuplicateInputResource) {
2463 /*
2464 * a:RESOURCE
2465 * / \
2466 * | |
2467 * v v
2468 * PCO:GPU (simple swap)
2469 * | \
2470 * | v
2471 * v r2:RESOURCE:CPU
2472 * r1:RESOURCE
2473 */
2474 FunctionDef func = test::function::Swap();
2475 GraphDef graph = GDef(
2476 {
2477 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
2478 NDef("y", "PartitionedCall", {"a", "a"},
2479 {{"Tin", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2480 {"Tout", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2481 {"f", FDH::FunctionRef("Swap", {{"T", DT_RESOURCE}})}},
2482 kGPU),
2483 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2484 NDef("r2", "Identity", {"y:1"}, {{"T", DT_RESOURCE}}, kCPU),
2485 },
2486 // FunctionLib
2487 {func});
2488
2489 Graph g(OpRegistry::Global());
2490 TF_EXPECT_OK(BuildGraph(graph, &g));
2491 TF_ASSERT_OK(CallOptPassesAndPlace(&g, false, true));
2492
2493 EXPECT_DEVICE_TYPE(g, "a", "FakeCPU");
2494 EXPECT_DEVICE_TYPE(g, "y", "FakeGPU");
2495 EXPECT_DEVICE_TYPE(g, "r1", "FakeCPU");
2496 EXPECT_DEVICE_TYPE(g, "r2", "FakeCPU");
2497 }
2498
TEST_F(NestedPlacerTest,DuplicateInputs_OutputResourceConsumedByMultipleOps)2499 TEST_F(NestedPlacerTest, DuplicateInputs_OutputResourceConsumedByMultipleOps) {
2500 /*
2501 * a:RESOURCE
2502 * / \
2503 * | |
2504 * v v
2505 * PCO:GPU (simple swap)
2506 * | \
2507 * | v
2508 * | r3:RESOURCE
2509 * |
2510 * ---+---
2511 * | |
2512 * | r2:RESOURCE:CPU
2513 * r1:RESOURCE
2514 */
2515 FunctionDef func = test::function::Swap();
2516 GraphDef graph = GDef(
2517 {
2518 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
2519 NDef("y", "PartitionedCall", {"a", "a"},
2520 {{"Tin", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2521 {"Tout", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2522 {"f", FDH::FunctionRef("Swap", {{"T", DT_RESOURCE}})}},
2523 kGPU),
2524 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2525 NDef("r2", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}, kCPU),
2526 NDef("r3", "Identity", {"y:1"}, {{"T", DT_RESOURCE}}),
2527 },
2528 // FunctionLib
2529 {func});
2530
2531 Graph g(OpRegistry::Global());
2532 TF_EXPECT_OK(BuildGraph(graph, &g));
2533 TF_ASSERT_OK(CallOptPassesAndPlace(&g, false, true));
2534
2535 EXPECT_DEVICE_TYPE(g, "a", "FakeCPU");
2536 EXPECT_DEVICE_TYPE(g, "y", "FakeGPU");
2537 EXPECT_DEVICE_TYPE(g, "r1", "FakeCPU");
2538 EXPECT_DEVICE_TYPE(g, "r2", "FakeCPU");
2539 EXPECT_DEVICE_TYPE(g, "r3", "FakeCPU");
2540 }
2541
TEST_F(NestedPlacerTest,DuplicateInputResource_Conflict)2542 TEST_F(NestedPlacerTest, DuplicateInputResource_Conflict) {
2543 /*
2544 * a:RESOURCE
2545 * / \
2546 * | |
2547 * v v
2548 * PCO:GPU (simple swap)
2549 * | \
2550 * | v
2551 * v r2:RESOURCE:CPU
2552 * r1:RESOURCE:GPU
2553 *
2554 * There is a conflict but Placer always overrides requested devices
2555 * when they result in conflict due to resource edges. Which device
2556 * is picked for a/r1/r2 is indeterministic.
2557 */
2558 FunctionDef func = test::function::Swap();
2559 GraphDef graph = GDef(
2560 {
2561 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
2562 NDef("y", "PartitionedCall", {"a", "a"},
2563 {{"Tin", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2564 {"Tout", DataTypeSlice{DT_RESOURCE, DT_RESOURCE}},
2565 {"f", FDH::FunctionRef("Swap", {{"T", DT_RESOURCE}})}},
2566 kGPU),
2567 NDef("r1", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}, kGPU),
2568 NDef("r2", "Identity", {"y:1"}, {{"T", DT_RESOURCE}}, kCPU),
2569 },
2570 // FunctionLib
2571 {func});
2572
2573 Graph g(OpRegistry::Global());
2574 TF_EXPECT_OK(BuildGraph(graph, &g));
2575 TF_ASSERT_OK(CallOptPassesAndPlace(&g, false, true));
2576
2577 EXPECT_SAME_TYPE(g, "a", "r1");
2578 EXPECT_SAME_TYPE(g, "a", "r2");
2579 }
2580
TEST_F(NestedPlacerTest,TestDstDeviceIsIgnoredWhenConstrainedByResourceEdge)2581 TEST_F(NestedPlacerTest, TestDstDeviceIsIgnoredWhenConstrainedByResourceEdge) {
2582 /*
2583 * a:RESOURCE:CPU
2584 * |
2585 * |
2586 * v
2587 * PCO (identity)
2588 * |
2589 * |
2590 * v
2591 * r1:RESOURCE:GPU
2592 *
2593 * r1'th device will be overridden.
2594 */
2595 FunctionDef func = test::function::ResourceIdentity();
2596 GraphDef graph = GDef(
2597 {
2598 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2599 NDef("y", "PartitionedCall", {"a"},
2600 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2601 {"Tout", DataTypeSlice{DT_RESOURCE}},
2602 {"f", FDH::FunctionRef("ResourceIdentity", {})}}),
2603 NDef("r1", "_Retval", {"y:0"}, {{"T", DT_RESOURCE}},
2604 kGPU // This device specification will be overridden
2605 ),
2606 },
2607 // FunctionLib
2608 {func});
2609
2610 Graph g(OpRegistry::Global());
2611 TF_EXPECT_OK(BuildGraph(graph, &g));
2612 TF_EXPECT_OK(CallOptPassesAndPlace(&g));
2613
2614 EXPECT_DEVICE_TYPE(g, "a", "FakeCPU");
2615 EXPECT_DEVICE_TYPE(g, "r1", "FakeCPU");
2616 }
2617
TEST_F(NestedPlacerTest,TestDstDeviceIsIgnoredWhenConstrainedByResourceEdge_EvenWhenPCOIsPlaced)2618 TEST_F(
2619 NestedPlacerTest,
2620 TestDstDeviceIsIgnoredWhenConstrainedByResourceEdge_EvenWhenPCOIsPlaced) {
2621 /*
2622 * a:RESOURCE:CPU
2623 * |
2624 * |
2625 * v
2626 * PCO:GPU (identity)
2627 * |
2628 * |
2629 * v
2630 * r1:RESOURCE:GPU
2631 *
2632 * r1'th device will be overridden.
2633 */
2634 FunctionDef func = test::function::ResourceIdentity();
2635 GraphDef graph = GDef(
2636 {
2637 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2638 NDef("y", "PartitionedCall", {"a"},
2639 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2640 {"Tout", DataTypeSlice{DT_RESOURCE}},
2641 {"f", FDH::FunctionRef("ResourceIdentity", {})}},
2642 kGPU),
2643 NDef("r1", "_Retval", {"y:0"}, {{"T", DT_RESOURCE}},
2644 kGPU // This device specification will be overridden
2645 ),
2646 },
2647 // FunctionLib
2648 {func});
2649
2650 Graph g(OpRegistry::Global());
2651 TF_EXPECT_OK(BuildGraph(graph, &g));
2652 TF_EXPECT_OK(CallOptPassesAndPlace(&g));
2653
2654 EXPECT_DEVICE_TYPE(g, "r1", "FakeCPU");
2655 EXPECT_DEVICE_TYPE(g, "y", "FakeGPU");
2656 }
2657
TEST_F(NestedPlacerTest,ResourceConflictInvolvingPCO)2658 TEST_F(NestedPlacerTest, ResourceConflictInvolvingPCO) {
2659 /*
2660 * a:RESOURCE:CPU
2661 * |
2662 * |
2663 * v
2664 * PCO (identity)
2665 * |
2666 * | b:RESOURCE:GPU
2667 * | |
2668 * v v
2669 * Add:RESOURCE
2670 *
2671 * Add op cannot be placed because the requested devices are on
2672 * resource generating ops and they conflict.
2673 */
2674 FunctionDef func = test::function::ResourceIdentity();
2675 GraphDef graph = GDef(
2676 {
2677 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2678 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kGPU),
2679 NDef("y", "PartitionedCall", {"a"},
2680 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2681 {"Tout", DataTypeSlice{DT_RESOURCE}},
2682 {"f", FDH::FunctionRef("ResourceIdentity", {})}}),
2683 NDef("add", "Add", {"y:0", "b"}, {{"T", DT_RESOURCE}}),
2684 },
2685 // FunctionLib
2686 {func});
2687
2688 Graph g(OpRegistry::Global());
2689 TF_EXPECT_OK(BuildGraph(graph, &g));
2690 Status s = CallOptPassesAndPlace(&g);
2691 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
2692 EXPECT_TRUE(absl::StrContains(
2693 s.error_message(),
2694 "Cannot place the graph because a reference or resource edge connects "
2695 "colocation groups with incompatible resource devices: /device:FakeCPU:0 "
2696 "vs /device:FakeGPU:0"))
2697 << s.ToString();
2698 }
2699
TEST_F(NestedPlacerTest,ResourceConflictInvolvingTwoPCOs)2700 TEST_F(NestedPlacerTest, ResourceConflictInvolvingTwoPCOs) {
2701 /*
2702 * a:RESOURCE:CPU
2703 * |
2704 * | b:RESOURCE:GPU
2705 * | |
2706 * v |
2707 * y:PCO (identity) |
2708 * | v
2709 * \ z:PCO (identity)
2710 * \ /
2711 * \ /
2712 * v v
2713 * Add:RESOURCE
2714 *
2715 * Add op cannot be placed.
2716 */
2717 FunctionDef func = test::function::ResourceIdentity();
2718 GraphDef graph = GDef(
2719 {
2720 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2721 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kGPU),
2722 NDef("y", "PartitionedCall", {"a"},
2723 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2724 {"Tout", DataTypeSlice{DT_RESOURCE}},
2725 {"f", FDH::FunctionRef("ResourceIdentity", {})}}),
2726 NDef("z", "PartitionedCall", {"b"},
2727 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2728 {"Tout", DataTypeSlice{DT_RESOURCE}},
2729 {"f", FDH::FunctionRef("ResourceIdentity", {})}}),
2730 NDef("add", "Add", {"y:0", "z:0"}, {{"T", DT_RESOURCE}}),
2731 },
2732 // FunctionLib
2733 {func});
2734
2735 Graph g(OpRegistry::Global());
2736 TF_EXPECT_OK(BuildGraph(graph, &g));
2737
2738 Status s = CallOptPassesAndPlace(&g);
2739 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
2740 EXPECT_TRUE(absl::StrContains(
2741 s.error_message(),
2742 "Cannot place the graph because a reference or resource edge connects "
2743 "colocation groups with incompatible resource devices: /device:FakeCPU:0 "
2744 "vs /device:FakeGPU:0"))
2745 << s.ToString();
2746 }
2747
2748 // Function that returns a resource that can be produced on CPU only.
CPUResourceOutput()2749 FunctionDef CPUResourceOutput() {
2750 return FDH::Create(
2751 // Name
2752 "CPUResourceOutput",
2753 // Args
2754 {"x: float"},
2755 // Return values
2756 {"ds: resource", "x_out: float"},
2757 // Attr def
2758 {},
2759 // Nodes
2760 {
2761 {{"make_ds"}, "CreateDatasetCPU", {}},
2762 },
2763 {{"ds", "make_ds:o:0"}, {"x_out", "x"}});
2764 }
2765
TEST_F(NestedPlacerTest,DeepDeviceConstraintsPropagated)2766 TEST_F(NestedPlacerTest, DeepDeviceConstraintsPropagated) {
2767 /*
2768 * a:FLOAT
2769 * |
2770 * v
2771 * PCO (CPUResourceOutput)
2772 * | |
2773 * | v
2774 * | (ignored)
2775 * |
2776 * v
2777 * id:Identity:GPU (assigned)
2778 *
2779 * The graph cannot be placed because the PCO can produce the resource
2780 * on CPU only.
2781 */
2782 FunctionDef func = CPUResourceOutput();
2783 GraphDef graph = GDef(
2784 {
2785 NDef("a", "_Arg", {}, {{"T", DT_FLOAT}}),
2786 NDef("y", "PartitionedCall", {"a"},
2787 {{"Tin", DataTypeSlice{DT_FLOAT}},
2788 {"Tout", DataTypeSlice{DT_RESOURCE, DT_FLOAT}},
2789 {"f", FDH::FunctionRef("CPUResourceOutput", {})}}),
2790 NDef("id", "Identity", {"y:0"}, {{"T", DT_RESOURCE}}),
2791 },
2792 // FunctionLib
2793 {func});
2794
2795 Graph g(OpRegistry::Global());
2796 TF_EXPECT_OK(BuildGraph(graph, &g));
2797 GetNodeByName(g, "id")->set_assigned_device_name(kFullGPU);
2798
2799 Status s = CallOptPassesAndPlace(&g);
2800 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
2801 // TODO(b/129057603): When better error messages are implemented, this should
2802 // change.
2803 EXPECT_TRUE(absl::StrContains(
2804 s.error_message(), "Could not satisfy explicit device specification"))
2805 << s.ToString();
2806 }
2807
NestedCPUResourceOutput()2808 FunctionDef NestedCPUResourceOutput() {
2809 return FDH::Create(
2810 // Name
2811 "NestedCPUResourceOutput",
2812 // Args
2813 {"x: float"},
2814 // Return values
2815 {"ds: resource", "x_out: float"},
2816 // Attr def
2817 {},
2818 // Nodes
2819 {
2820 {{"y"},
2821 "PartitionedCall",
2822 {"x"},
2823 {{"Tin", DataTypeSlice{DT_FLOAT}},
2824 {"Tout", DataTypeSlice{DT_RESOURCE, DT_FLOAT}},
2825 {"f", FDH::FunctionRef("CPUResourceOutput", {})}}},
2826 },
2827 {{"ds", "y:output:0"}, {"x_out", "y:output:1"}});
2828 }
2829
TEST_F(NestedPlacerTest,NestedDeepDeviceConstraintsPropagated)2830 TEST_F(NestedPlacerTest, NestedDeepDeviceConstraintsPropagated) {
2831 /*
2832 * a:FLOAT
2833 * |
2834 * v
2835 * PCO (NestedCPUResourceOutput)
2836 * | |
2837 * | v
2838 * | (ignored)
2839 * |
2840 * v
2841 * id:_Retval:GPU (assigned)
2842 *
2843 * The graph cannot be placed because the PCO can produce the resource
2844 * on CPU only.
2845 */
2846 GraphDef graph = GDef(
2847 {
2848 NDef("a", "_Arg", {}, {{"T", DT_FLOAT}}),
2849 NDef("y", "PartitionedCall", {"a"},
2850 {{"Tin", DataTypeSlice{DT_FLOAT}},
2851 {"Tout", DataTypeSlice{DT_RESOURCE, DT_FLOAT}},
2852 {"f", FDH::FunctionRef("NestedCPUResourceOutput", {})}}),
2853 NDef("id", "_Retval", {"y:0"}, {{"T", DT_RESOURCE}}),
2854 },
2855 // FunctionLib
2856 {CPUResourceOutput(), NestedCPUResourceOutput()});
2857
2858 Graph g(OpRegistry::Global());
2859 TF_EXPECT_OK(BuildGraph(graph, &g));
2860 GetNodeByName(g, "id")->set_assigned_device_name(kFullGPU);
2861
2862 Status s = CallOptPassesAndPlace(&g);
2863 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
2864 // TODO(b/129057603): When better error messages are implemented, this should
2865 // change.
2866 EXPECT_TRUE(absl::StrContains(
2867 s.error_message(), "Could not satisfy explicit device specification"))
2868 << s.ToString();
2869 }
2870
TEST_F(NestedPlacerTest,TwoFunctionsBackToBack)2871 TEST_F(NestedPlacerTest, TwoFunctionsBackToBack) {
2872 /*
2873 * a:RESOURCE:CPU
2874 * |
2875 * | b:RESOURCE:GPU
2876 * v |
2877 * y:PCO (identity) |
2878 * | |
2879 * w:PCO (identity) |
2880 * | v
2881 * \ z:PCO (identity)
2882 * \ /
2883 * \ /
2884 * v v
2885 * Add:RESOURCE
2886 *
2887 * Add op cannot be placed.
2888 * Two PCOs back to back is a challenging case that required adding
2889 * IsolateDeepOpsPass.
2890 */
2891 FunctionDef func = test::function::ResourceIdentity();
2892 GraphDef graph = GDef(
2893 {
2894 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}, kCPU),
2895 NDef("b", "_Arg", {}, {{"T", DT_RESOURCE}}, kGPU),
2896 NDef("y", "PartitionedCall", {"a"},
2897 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2898 {"Tout", DataTypeSlice{DT_RESOURCE}},
2899 {"f", FDH::FunctionRef("ResourceIdentity", {})}}),
2900 NDef("w", "PartitionedCall", {"y:0"},
2901 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2902 {"Tout", DataTypeSlice{DT_RESOURCE}},
2903 {"f", FDH::FunctionRef("ResourceIdentity", {})}}),
2904 NDef("z", "PartitionedCall", {"b"},
2905 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2906 {"Tout", DataTypeSlice{DT_RESOURCE}},
2907 {"f", FDH::FunctionRef("ResourceIdentity", {})}}),
2908 NDef("add", "Add", {"w:0", "z:0"}, {{"T", DT_RESOURCE}}),
2909 },
2910 // FunctionLib
2911 {func});
2912
2913 Graph g(OpRegistry::Global());
2914 TF_EXPECT_OK(BuildGraph(graph, &g));
2915
2916 Status s = CallOptPassesAndPlace(&g);
2917 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
2918 EXPECT_TRUE(absl::StrContains(
2919 s.error_message(),
2920 "Cannot place the graph because a reference or resource edge connects "
2921 "colocation groups with incompatible resource devices: /device:FakeCPU:0 "
2922 "vs /device:FakeGPU:0"))
2923 << s.ToString();
2924 }
2925
NestedCallFunctionsBackToBack()2926 FunctionDef NestedCallFunctionsBackToBack() {
2927 return FDH::Create(
2928 // Name
2929 "NestedCallFunctionsBackToBack",
2930 // Args
2931 {},
2932 // Return values
2933 {"output: resource"},
2934 // Attr def
2935 {},
2936 // Nodes
2937 {
2938 {{"cpu_ds"}, "CreateDatasetCPU", {}},
2939 {{"y"},
2940 "PartitionedCall",
2941 {"cpu_ds:o:0"},
2942 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2943 {"Tout", DataTypeSlice{DT_RESOURCE}},
2944 {"f", FDH::FunctionRef("ResourceIdentity", {})}}},
2945 {{"w"},
2946 "PartitionedCall",
2947 {"y:output:0"},
2948 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2949 {"Tout", DataTypeSlice{DT_RESOURCE}},
2950 {"f", FDH::FunctionRef("ResourceIdentity", {})}}},
2951 {{"gpu_ds"}, "CreateDatasetGPU", {}},
2952 {{"z"},
2953 "PartitionedCall",
2954 {"gpu_ds:o:0"},
2955 {{"Tin", DataTypeSlice{DT_RESOURCE}},
2956 {"Tout", DataTypeSlice{DT_RESOURCE}},
2957 {"f", FDH::FunctionRef("ResourceIdentity", {})}}},
2958 {{"add"}, "Add", {"w:output:0", "z:output:0"}, {{"T", DT_RESOURCE}}},
2959 },
2960 {{"output", "add:z:0"}});
2961 }
2962
TEST_F(NestedPlacerTest,NestedTwoFunctionsBackToBack)2963 TEST_F(NestedPlacerTest, NestedTwoFunctionsBackToBack) {
2964 /*
2965 * Same as TwoFunctionsBackToBack above but the functions are invoked in
2966 * another function instead of the top level graph. This tests that Placer
2967 * isolates deep ops in nested function bodies.
2968 */
2969 FunctionDef func = NestedCallFunctionsBackToBack();
2970 GraphDef graph = GDef(
2971 {
2972 NDef("y", "PartitionedCall", {},
2973 {{"Tin", {}},
2974 {"Tout", DataTypeSlice{DT_FLOAT}},
2975 {"f", FDH::FunctionRef("NestedCallFunctionsBackToBack", {})}}),
2976 },
2977 // FunctionLib
2978 {NestedCallFunctionsBackToBack(), test::function::ResourceIdentity()});
2979
2980 Graph g(OpRegistry::Global());
2981 TF_EXPECT_OK(BuildGraph(graph, &g));
2982
2983 Status s = CallOptPassesAndPlace(&g);
2984 EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
2985 EXPECT_TRUE(absl::StrContains(
2986 s.error_message(),
2987 "Nodes were connected by a reference or resource connection (requiring "
2988 "them to be on the same device), but the two nodes were assigned two "
2989 "different devices"))
2990 << s.ToString();
2991 }
2992
RecursiveResourceIdentity()2993 FunctionDef RecursiveResourceIdentity() {
2994 return FDH::Create(
2995 // Name
2996 "RecursiveResourceIdentity",
2997 // Args
2998 {"x: resource"},
2999 // Return values
3000 {"y: resource"},
3001 // Attr def
3002 {},
3003 // Nodes
3004 {
3005 {{"out"},
3006 "PartitionedCall",
3007 {"x"},
3008 {{"Tin", DataTypeSlice{DT_RESOURCE}},
3009 {"Tout", DataTypeSlice{DT_RESOURCE}},
3010 {"f", FDH::FunctionRef("RecursiveResourceIdentity", {})}}},
3011 },
3012 // Output mapping
3013 {{"y", "out:output:0"}});
3014 }
3015
TEST_F(NestedPlacerTest,DirectRecursion)3016 TEST_F(NestedPlacerTest, DirectRecursion) {
3017 GraphDef graph = GDef(
3018 {
3019 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
3020 NDef("y", "PartitionedCall", {"a"},
3021 {{"Tin", DataTypeSlice{DT_RESOURCE}},
3022 {"Tout", DataTypeSlice{DT_RESOURCE}},
3023 {"f", FDH::FunctionRef("RecursiveResourceIdentity", {})}}),
3024 NDef("r1", "_Retval", {"y:0"}, {{"T", DT_RESOURCE}}),
3025 },
3026 // FunctionLib
3027 {RecursiveResourceIdentity()});
3028
3029 Graph g(OpRegistry::Global());
3030 TF_EXPECT_OK(BuildGraph(graph, &g));
3031
3032 Status s = CallOptPassesAndPlace(&g);
3033 EXPECT_EQ(error::UNIMPLEMENTED, s.code()) << s.ToString();
3034 EXPECT_TRUE(absl::StrContains(
3035 s.error_message(),
3036 "Recursive function calls are not supported. Node {{node out}} inside "
3037 "the body of {{function_node RecursiveResourceIdentity}} calls function "
3038 "{{function_node RecursiveResourceIdentity}}"))
3039 << s.ToString();
3040 }
3041
RecursiveF1()3042 FunctionDef RecursiveF1() {
3043 return FDH::Create(
3044 // Name
3045 "RecursiveF1",
3046 // Args
3047 {"x: resource"},
3048 // Return values
3049 {"y: resource"},
3050 // Attr def
3051 {},
3052 // Nodes
3053 {
3054 {{"out"},
3055 "PartitionedCall",
3056 {"x"},
3057 {{"Tin", DataTypeSlice{DT_RESOURCE}},
3058 {"Tout", DataTypeSlice{DT_RESOURCE}},
3059 {"f", FDH::FunctionRef("RecursiveF2", {})}}},
3060 },
3061 // Output mapping
3062 {{"y", "out:output:0"}});
3063 }
3064
RecursiveF2()3065 FunctionDef RecursiveF2() {
3066 return FDH::Create(
3067 // Name
3068 "RecursiveF2",
3069 // Args
3070 {"x: resource"},
3071 // Return values
3072 {"y: resource"},
3073 // Attr def
3074 {},
3075 // Nodes
3076 {
3077 {{"out"},
3078 "PartitionedCall",
3079 {"x"},
3080 {{"Tin", DataTypeSlice{DT_RESOURCE}},
3081 {"Tout", DataTypeSlice{DT_RESOURCE}},
3082 {"f", FDH::FunctionRef("RecursiveF1", {})}}},
3083 },
3084 // Output mapping
3085 {{"y", "out:output:0"}});
3086 }
3087
TEST_F(NestedPlacerTest,IndirectRecursion)3088 TEST_F(NestedPlacerTest, IndirectRecursion) {
3089 GraphDef graph = GDef(
3090 {
3091 NDef("a", "_Arg", {}, {{"T", DT_RESOURCE}}),
3092 NDef("y", "PartitionedCall", {"a"},
3093 {{"Tin", DataTypeSlice{DT_RESOURCE}},
3094 {"Tout", DataTypeSlice{DT_RESOURCE}},
3095 {"f", FDH::FunctionRef("RecursiveF1", {})}}),
3096 NDef("r1", "_Retval", {"y:0"}, {{"T", DT_RESOURCE}}),
3097 },
3098 // FunctionLib
3099 {RecursiveF1(), RecursiveF2()});
3100
3101 Graph g(OpRegistry::Global());
3102 TF_EXPECT_OK(BuildGraph(graph, &g));
3103
3104 Status s = CallOptPassesAndPlace(&g);
3105 EXPECT_EQ(error::UNIMPLEMENTED, s.code()) << s.ToString();
3106 EXPECT_TRUE(absl::StrContains(
3107 s.error_message(),
3108 "Recursive function calls are not supported. Node {{node out}} inside "
3109 "the body of {{function_node RecursiveF2}} calls function "
3110 "{{function_node RecursiveF1}} which is already present in the call "
3111 "stack"))
3112 << s.ToString();
3113 }
3114
3115 } // namespace
3116 } // namespace tensorflow
3117