xref: /aosp_15_r20/external/tensorflow/tensorflow/core/profiler/utils/group_events.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_
17 #define TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_
18 
19 #include <deque>
20 #include <functional>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 
26 #include "absl/container/flat_hash_map.h"
27 #include "absl/container/flat_hash_set.h"
28 #include "absl/strings/string_view.h"
29 #include "absl/types/optional.h"
30 #include "tensorflow/core/platform/logging.h"
31 #include "tensorflow/core/platform/types.h"
32 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
33 #include "tensorflow/core/profiler/utils/xplane_visitor.h"
34 
35 namespace tensorflow {
36 namespace profiler {
37 
38 // Information required to connect events across threads. The first two fields
39 // specify the event types of parent and child events. In addition to matching
40 // the event types, both events should have stats of the stat types specified
41 // in stat_types and their values should be the same.
42 struct InterThreadConnectInfo {
43   int64_t parent_event_type;
44   int64_t child_event_type;
45   std::vector<int64_t> parent_stat_types;
46   std::vector<int64_t> child_stat_types;
47 };
48 
49 struct GroupMetadata {
50   std::string name;
51   absl::flat_hash_set<int64_t> parents;
52   absl::flat_hash_set<int64_t> children;
53 };
54 
55 using GroupMetadataMap =
56     absl::flat_hash_map<int64_t /*group_id*/, GroupMetadata>;
57 
58 // A wrapper for XEvent with parent and children pointers. Through these
59 // pointers, a tree of EventNode is formed.
60 class EventNode {
61  public:
EventNode(XEventVisitor visitor)62   explicit EventNode(XEventVisitor visitor) : visitor_(std::move(visitor)) {}
63 
64   EventNode(const EventNode& event_node) = delete;
65   EventNode& operator=(const EventNode&) = delete;
66 
GetParents()67   const std::vector<EventNode*>& GetParents() const { return parents_; }
68 
GetChildren()69   const std::vector<EventNode*>& GetChildren() const { return children_; }
70 
AddChild(EventNode * child)71   void AddChild(EventNode* child) {
72     children_.push_back(child);
73     child->parents_.push_back(this);
74   }
75 
GetGroupId()76   absl::optional<int64_t> GetGroupId() const { return group_id_; }
77 
78   std::string GetGroupName() const;
79 
80   void SetGroupId(int64_t group_id);
81 
82   // Sets group_id for this node and its descendants.
83   void PropagateGroupId(int64_t group_id, GroupMetadataMap* group_metadata_map);
84 
GetEventVisitor()85   const XEventVisitor& GetEventVisitor() const { return visitor_; }
86 
87   absl::optional<XStatVisitor> GetContextStat(int64_t stat_type) const;
88 
89   void AddStepName(absl::string_view step_name);
90 
91   void SetIsEager(bool is_eager);
92 
93   // Returns true if this event is part of eagerly executed op.
94   bool IsEager() const;
95 
96   bool IsNestedIn(EventNode* parent);
97 
98   // Returns the closest parent (including itself) of the given event type.
99   const EventNode* FindParent(int64_t event_type) const;
100 
SetRootLevel(int root_level)101   void SetRootLevel(int root_level) { root_level_ = root_level; }
102 
RootLevel()103   int RootLevel() const { return root_level_; }
104 
105   bool IsCompiledFunc() const;
106 
107   // Compare two EventNodes based on start timestamp.
108   bool operator<(const EventNode& other) const {
109     return GetEventVisitor().TimestampPs() <
110            other.GetEventVisitor().TimestampPs();
111   }
112 
113  private:
114   XStat* FindOrAddStatByType(int64_t stat_type);
115 
116   XEventVisitor visitor_;
117   std::vector<EventNode*> parents_;
118   std::vector<EventNode*> children_;
119   absl::optional<int64_t> group_id_;
120   // Root event level.
121   // By default root_level_ is set to 0, which means it is not a root event.
122   // Events with root_level_ greater than 0 are considered as root events.
123   int root_level_ = 0;
124 };
125 
126 using EventNodeMap =
127     absl::flat_hash_map<int64_t /*event_type*/, std::deque<EventNode>>;
128 
129 using EventList = std::vector<EventNode*>;
130 
131 struct ContextGroup {
132   std::vector<EventNode*> producers;
133   std::vector<EventNode*> consumers;
134 };
135 
136 using ContextGroupMap = absl::flat_hash_map<
137     int /*context_type*/,
138     absl::flat_hash_map<uint64 /*context_id*/, ContextGroup>>;
139 
140 // EventForest augments the input XSpace with the trace context. The trace
141 // context is created by stitching XEvents (1) using the nesting relationship
142 // within the same thread and (2) comparing the semantic arguments or using
143 // connect_info_list across threads. It also groups the events by the root
144 // events specified in root_event_types or marked by the semantic argument.
145 class EventForest {
146  public:
147   void AddSpace(
148       const std::function<XPlaneVisitor(const XPlane*)> visitor_factory,
149       XSpace* space);
150 
151   void AddPlanes(
152       const std::function<XPlaneVisitor(const XPlane*)> visitor_factory,
153       const std::vector<XPlane*>& planes);
154 
155   void ConnectEvents(
156       const std::vector<InterThreadConnectInfo>& connect_info_list = {});
157 
158   void ConnectTfDataEvents();
159 
160   void GroupEvents();
161 
GetEventNodeMap()162   const EventNodeMap& GetEventNodeMap() const { return event_node_map_; }
163 
GetGroupMetadataMap()164   const GroupMetadataMap& GetGroupMetadataMap() const {
165     return group_metadata_map_;
166   }
167 
168  private:
169   void AddPlane(
170       const std::function<XPlaneVisitor(const XPlane*)> visitor_factory,
171       XPlane* plane);
172 
173   // Creates an EventNode for each event in event_node_map and connect events
174   // according to the nesting relationship within the thread.
175   void ConnectIntraThread(XPlane* plane, XPlaneVisitor* visitor,
176                           ContextGroupMap* context_groups);
177 
178   // Connects events across threads according to connect_info_list.
179   void ConnectInterThread(
180       const std::vector<InterThreadConnectInfo>& connect_info_list);
181 
182   // Creates event groups and populates group_metadata_map. If a TF loop is
183   // used, each TF loop iteration becomes a root. Otherwise, top root events
184   // (i.e., none of their ancestors is a root event) are used as roots. A new
185   // group is created with all events reachable from a root.
186   void CreateEventGroups();
187 
188   // Sets the is_eager stat to true for the eagerly executed GPU kernel events.
189   void MarkEagerlyExecutedGpuKernels();
190 
191   // Sets the is_eager stat to true for the eagerly executed CPU TF op events.
192   void MarkEagerlyExecutedCpuTfOps();
193 
194   // Populate all the step ids that associated with tf.data pipeline.
195   // Because FunctionRun is considered as root, but we want to exclude those
196   // FunctionRuns from tf.data.
197   void ProcessTfDataSteps();
198 
199   // Processes the TF loops and registers the first TF executor event of each
200   // iteraton to `tf_loop_root_events_`.
201   void ProcessTensorFlowLoop();
202 
203   // Processes the worker thread by connecting a FunctionRun with the following
204   // eager ops (e.g., for Keras callback).
205   void ProcessWorker();
206 
207   EventNodeMap event_node_map_;
208   std::vector<XPlaneVisitor> visitors_;
209   // std::deque for pointer stability.
210   std::deque<std::pair<XPlane*, XPlaneVisitor>> planes_;
211   // The "step" id (actually it is "function" id that are associated with
212   // the tf.data pipeline.
213   absl::flat_hash_set<int64_t> tf_data_step_ids_;
214   EventList tf_loop_root_events_;
215   GroupMetadataMap group_metadata_map_;
216 };
217 
218 std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList();
219 
220 // Calls GroupEvents with connect_info_list and root_event_types specific to
221 // TensorFlow.
222 void GroupTfEvents(XSpace* space, EventForest* event_forest);
223 void GroupTfEvents(XSpace* space);
224 
225 // Returns true if the given space has TF's loop ops.
226 bool CheckLoopOp(const XSpace& space);
227 
228 }  // namespace profiler
229 }  // namespace tensorflow
230 
231 #endif  // TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_
232