1 #pragma once 2 3 #include <array> 4 #include <cstdint> 5 #include <memory> 6 #include <stack> 7 #include <string> 8 #include <unordered_map> 9 #include <utility> 10 #include <vector> 11 12 #include <torch/csrc/profiler/events.h> 13 14 #include <c10/util/Exception.h> 15 16 namespace torch::profiler::impl::linux_perf { 17 18 /* 19 * Maximum number of events supported 20 * This stems from the hardware limitation on CPU performance counters, and the 21 * fact that we don't support time multiplexing just yet. 22 * Time multiplexing involves scaling the counter values proportional to 23 * the enabled and running time or running the workload multiple times. 24 */ 25 constexpr uint8_t MAX_EVENTS = 4; 26 27 struct PerfCounter { 28 uint64_t value; /* The value of the event */ 29 uint64_t time_enabled; /* for TIME_ENABLED */ 30 uint64_t time_running; /* for TIME_RUNNING */ 31 }; 32 33 /* 34 * Basic perf event handler for Android and Linux 35 */ 36 class PerfEvent { 37 public: PerfEvent(std::string & name)38 explicit PerfEvent(std::string& name) : name_(name) {} 39 40 PerfEvent& operator=(PerfEvent&& other) noexcept { 41 if (this != &other) { 42 fd_ = other.fd_; 43 other.fd_ = -1; 44 name_ = std::move(other.name_); 45 } 46 return *this; 47 } 48 PerfEvent(PerfEvent && other)49 PerfEvent(PerfEvent&& other) noexcept { 50 *this = std::move(other); 51 } 52 53 ~PerfEvent(); 54 55 /* Setup perf events with the Linux Kernel, attaches perf to this process 56 * using perf_event_open(2) */ 57 void Init(); 58 59 /* Stop incrementing hardware counters for this event */ 60 void Disable() const; 61 62 /* Start counting hardware event from this point on */ 63 void Enable() const; 64 65 /* Zero out the counts for this event */ 66 void Reset() const; 67 68 /* Returns PerfCounter values for this event from kernel, on non supported 69 * platforms this always returns zero */ 70 uint64_t ReadCounter() const; 71 72 private: 73 /* Name of the event */ 74 std::string name_; 75 76 int fd_ = -1; 77 }; 78 79 class PerfProfiler { 80 public: 81 /* Configure all the events and track them as individual PerfEvent */ 82 void Configure(std::vector<std::string>& event_names); 83 84 /* Enable events counting from here */ 85 void Enable(); 86 87 /* Disable counting and fill in the caller supplied container with delta 88 * calculated from the start count values since last Enable() */ 89 void Disable(perf_counters_t&); 90 91 private: 92 uint64_t CalcDelta(uint64_t start, uint64_t end) const; 93 void StartCounting() const; 94 void StopCounting() const; 95 96 std::vector<PerfEvent> events_; 97 std::stack<perf_counters_t> start_values_; 98 }; 99 } // namespace torch::profiler::impl::linux_perf 100