1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ 17 #define TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ 18 19 #include <deque> 20 #include <functional> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 26 #include "absl/container/flat_hash_map.h" 27 #include "absl/container/flat_hash_set.h" 28 #include "absl/strings/string_view.h" 29 #include "absl/types/optional.h" 30 #include "tensorflow/core/platform/logging.h" 31 #include "tensorflow/core/platform/types.h" 32 #include "tensorflow/core/profiler/protobuf/xplane.pb.h" 33 #include "tensorflow/core/profiler/utils/xplane_visitor.h" 34 35 namespace tensorflow { 36 namespace profiler { 37 38 // Information required to connect events across threads. The first two fields 39 // specify the event types of parent and child events. In addition to matching 40 // the event types, both events should have stats of the stat types specified 41 // in stat_types and their values should be the same. 42 struct InterThreadConnectInfo { 43 int64_t parent_event_type; 44 int64_t child_event_type; 45 std::vector<int64_t> parent_stat_types; 46 std::vector<int64_t> child_stat_types; 47 }; 48 49 struct GroupMetadata { 50 std::string name; 51 absl::flat_hash_set<int64_t> parents; 52 absl::flat_hash_set<int64_t> children; 53 }; 54 55 using GroupMetadataMap = 56 absl::flat_hash_map<int64_t /*group_id*/, GroupMetadata>; 57 58 // A wrapper for XEvent with parent and children pointers. Through these 59 // pointers, a tree of EventNode is formed. 60 class EventNode { 61 public: EventNode(XEventVisitor visitor)62 explicit EventNode(XEventVisitor visitor) : visitor_(std::move(visitor)) {} 63 64 EventNode(const EventNode& event_node) = delete; 65 EventNode& operator=(const EventNode&) = delete; 66 GetParents()67 const std::vector<EventNode*>& GetParents() const { return parents_; } 68 GetChildren()69 const std::vector<EventNode*>& GetChildren() const { return children_; } 70 AddChild(EventNode * child)71 void AddChild(EventNode* child) { 72 children_.push_back(child); 73 child->parents_.push_back(this); 74 } 75 GetGroupId()76 absl::optional<int64_t> GetGroupId() const { return group_id_; } 77 78 std::string GetGroupName() const; 79 80 void SetGroupId(int64_t group_id); 81 82 // Sets group_id for this node and its descendants. 83 void PropagateGroupId(int64_t group_id, GroupMetadataMap* group_metadata_map); 84 GetEventVisitor()85 const XEventVisitor& GetEventVisitor() const { return visitor_; } 86 87 absl::optional<XStatVisitor> GetContextStat(int64_t stat_type) const; 88 89 void AddStepName(absl::string_view step_name); 90 91 void SetIsEager(bool is_eager); 92 93 // Returns true if this event is part of eagerly executed op. 94 bool IsEager() const; 95 96 bool IsNestedIn(EventNode* parent); 97 98 // Returns the closest parent (including itself) of the given event type. 99 const EventNode* FindParent(int64_t event_type) const; 100 SetRootLevel(int root_level)101 void SetRootLevel(int root_level) { root_level_ = root_level; } 102 RootLevel()103 int RootLevel() const { return root_level_; } 104 105 bool IsCompiledFunc() const; 106 107 // Compare two EventNodes based on start timestamp. 108 bool operator<(const EventNode& other) const { 109 return GetEventVisitor().TimestampPs() < 110 other.GetEventVisitor().TimestampPs(); 111 } 112 113 private: 114 XStat* FindOrAddStatByType(int64_t stat_type); 115 116 XEventVisitor visitor_; 117 std::vector<EventNode*> parents_; 118 std::vector<EventNode*> children_; 119 absl::optional<int64_t> group_id_; 120 // Root event level. 121 // By default root_level_ is set to 0, which means it is not a root event. 122 // Events with root_level_ greater than 0 are considered as root events. 123 int root_level_ = 0; 124 }; 125 126 using EventNodeMap = 127 absl::flat_hash_map<int64_t /*event_type*/, std::deque<EventNode>>; 128 129 using EventList = std::vector<EventNode*>; 130 131 struct ContextGroup { 132 std::vector<EventNode*> producers; 133 std::vector<EventNode*> consumers; 134 }; 135 136 using ContextGroupMap = absl::flat_hash_map< 137 int /*context_type*/, 138 absl::flat_hash_map<uint64 /*context_id*/, ContextGroup>>; 139 140 // EventForest augments the input XSpace with the trace context. The trace 141 // context is created by stitching XEvents (1) using the nesting relationship 142 // within the same thread and (2) comparing the semantic arguments or using 143 // connect_info_list across threads. It also groups the events by the root 144 // events specified in root_event_types or marked by the semantic argument. 145 class EventForest { 146 public: 147 void AddSpace( 148 const std::function<XPlaneVisitor(const XPlane*)> visitor_factory, 149 XSpace* space); 150 151 void AddPlanes( 152 const std::function<XPlaneVisitor(const XPlane*)> visitor_factory, 153 const std::vector<XPlane*>& planes); 154 155 void ConnectEvents( 156 const std::vector<InterThreadConnectInfo>& connect_info_list = {}); 157 158 void ConnectTfDataEvents(); 159 160 void GroupEvents(); 161 GetEventNodeMap()162 const EventNodeMap& GetEventNodeMap() const { return event_node_map_; } 163 GetGroupMetadataMap()164 const GroupMetadataMap& GetGroupMetadataMap() const { 165 return group_metadata_map_; 166 } 167 168 private: 169 void AddPlane( 170 const std::function<XPlaneVisitor(const XPlane*)> visitor_factory, 171 XPlane* plane); 172 173 // Creates an EventNode for each event in event_node_map and connect events 174 // according to the nesting relationship within the thread. 175 void ConnectIntraThread(XPlane* plane, XPlaneVisitor* visitor, 176 ContextGroupMap* context_groups); 177 178 // Connects events across threads according to connect_info_list. 179 void ConnectInterThread( 180 const std::vector<InterThreadConnectInfo>& connect_info_list); 181 182 // Creates event groups and populates group_metadata_map. If a TF loop is 183 // used, each TF loop iteration becomes a root. Otherwise, top root events 184 // (i.e., none of their ancestors is a root event) are used as roots. A new 185 // group is created with all events reachable from a root. 186 void CreateEventGroups(); 187 188 // Sets the is_eager stat to true for the eagerly executed GPU kernel events. 189 void MarkEagerlyExecutedGpuKernels(); 190 191 // Sets the is_eager stat to true for the eagerly executed CPU TF op events. 192 void MarkEagerlyExecutedCpuTfOps(); 193 194 // Populate all the step ids that associated with tf.data pipeline. 195 // Because FunctionRun is considered as root, but we want to exclude those 196 // FunctionRuns from tf.data. 197 void ProcessTfDataSteps(); 198 199 // Processes the TF loops and registers the first TF executor event of each 200 // iteraton to `tf_loop_root_events_`. 201 void ProcessTensorFlowLoop(); 202 203 // Processes the worker thread by connecting a FunctionRun with the following 204 // eager ops (e.g., for Keras callback). 205 void ProcessWorker(); 206 207 EventNodeMap event_node_map_; 208 std::vector<XPlaneVisitor> visitors_; 209 // std::deque for pointer stability. 210 std::deque<std::pair<XPlane*, XPlaneVisitor>> planes_; 211 // The "step" id (actually it is "function" id that are associated with 212 // the tf.data pipeline. 213 absl::flat_hash_set<int64_t> tf_data_step_ids_; 214 EventList tf_loop_root_events_; 215 GroupMetadataMap group_metadata_map_; 216 }; 217 218 std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList(); 219 220 // Calls GroupEvents with connect_info_list and root_event_types specific to 221 // TensorFlow. 222 void GroupTfEvents(XSpace* space, EventForest* event_forest); 223 void GroupTfEvents(XSpace* space); 224 225 // Returns true if the given space has TF's loop ops. 226 bool CheckLoopOp(const XSpace& space); 227 228 } // namespace profiler 229 } // namespace tensorflow 230 231 #endif // TENSORFLOW_CORE_PROFILER_UTILS_GROUP_EVENTS_H_ 232