xref: /aosp_15_r20/external/pytorch/torch/csrc/profiler/perf.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 
3 #include <array>
4 #include <cstdint>
5 #include <memory>
6 #include <stack>
7 #include <string>
8 #include <unordered_map>
9 #include <utility>
10 #include <vector>
11 
12 #include <torch/csrc/profiler/events.h>
13 
14 #include <c10/util/Exception.h>
15 
16 namespace torch::profiler::impl::linux_perf {
17 
18 /*
19  * Maximum number of events supported
20  * This stems from the hardware limitation on CPU performance counters, and the
21  * fact that we don't support time multiplexing just yet.
22  * Time multiplexing involves scaling the counter values proportional to
23  * the enabled and running time or running the workload multiple times.
24  */
25 constexpr uint8_t MAX_EVENTS = 4;
26 
27 struct PerfCounter {
28   uint64_t value; /* The value of the event */
29   uint64_t time_enabled; /* for TIME_ENABLED */
30   uint64_t time_running; /* for TIME_RUNNING */
31 };
32 
33 /*
34  * Basic perf event handler for Android and Linux
35  */
36 class PerfEvent {
37  public:
PerfEvent(std::string & name)38   explicit PerfEvent(std::string& name) : name_(name) {}
39 
40   PerfEvent& operator=(PerfEvent&& other) noexcept {
41     if (this != &other) {
42       fd_ = other.fd_;
43       other.fd_ = -1;
44       name_ = std::move(other.name_);
45     }
46     return *this;
47   }
48 
PerfEvent(PerfEvent && other)49   PerfEvent(PerfEvent&& other) noexcept {
50     *this = std::move(other);
51   }
52 
53   ~PerfEvent();
54 
55   /* Setup perf events with the Linux Kernel, attaches perf to this process
56    * using perf_event_open(2) */
57   void Init();
58 
59   /* Stop incrementing hardware counters for this event */
60   void Disable() const;
61 
62   /* Start counting hardware event from this point on */
63   void Enable() const;
64 
65   /* Zero out the counts for this event */
66   void Reset() const;
67 
68   /* Returns PerfCounter values for this event from kernel, on non supported
69    * platforms this always returns zero */
70   uint64_t ReadCounter() const;
71 
72  private:
73   /* Name of the event */
74   std::string name_;
75 
76   int fd_ = -1;
77 };
78 
79 class PerfProfiler {
80  public:
81   /* Configure all the events and track them as individual PerfEvent */
82   void Configure(std::vector<std::string>& event_names);
83 
84   /* Enable events counting from here */
85   void Enable();
86 
87   /* Disable counting and fill in the caller supplied container with delta
88    * calculated from the start count values since last Enable() */
89   void Disable(perf_counters_t&);
90 
91  private:
92   uint64_t CalcDelta(uint64_t start, uint64_t end) const;
93   void StartCounting() const;
94   void StopCounting() const;
95 
96   std::vector<PerfEvent> events_;
97   std::stack<perf_counters_t> start_values_;
98 };
99 } // namespace torch::profiler::impl::linux_perf
100