xref: /aosp_15_r20/external/pytorch/torch/csrc/jit/mobile/profiler_edge.h (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1 #pragma once
2 #include <torch/csrc/autograd/profiler_kineto.h>
3 #include <torch/csrc/jit/mobile/module.h>
4 
5 namespace torch::jit::mobile {
6 
7 // If we dont have kineto available then edge profiler does not
8 // work since it relies on Kineto
9 #ifdef USE_KINETO
10 class TORCH_API KinetoEdgeCPUProfiler {
11  public:
12   // This profiler only profiles KINETO events
13   // No GPU_FALLBACK or NVTX
14   /*
15    * @param m is the instance of mobile Module which is being profiled.
16    *        Note that this implies that KinetoEdgeCPUProfiler can be used
17    *        to profile specific Module (see usage below), unliked ProfilerKineto
18    *        which can profile pytorch runtime in arbitrary scope.
19    * @param fname is the name of the file to which chrome trace is written.
20    * @param report_input_shapes: whether to record shapes of op's inputs.
21    * @param with_stack: whether to record model's python stacktrace for the op.
22    * @param with_flops: whether to report flops corresponding to the op.
23    * @param with_modules: whether to report original python module
24    *        hierarchy to which the op belongs.
25    * @param events
26    * @param adjust_vulkan_timestamps: whether to adjust vulkan timestamps from
27    *        query pool to align with cpu event times
28    *
29    * Usage pattern for this profiler must be as follows:
30    *
31    * {
32    *   KinetoEdgeCPUProfiler(m, filename, args);
33    *   m.forward(...);
34    * }
35    *
36    * The reason being that KinetoEdgeCPUProfiler has a dependency on Module
37    * and thus it must not outlive it.
38    *
39    * Thus, when KinetoEdgeCPUProfiler is used as RAII to do profiling
40    * within certain scope. In that scope, the captured reference to
41    * Module will outlive KinetoEdgeCPUProfiler. This is gauranteed because
42    * KinetoEdgeCPUProfiler must be constructed later than Module, on stack.
43    *
44    * An example of the anti-pattern and wrong usage is:
45    *
46    * std::shared_ptr<KinetoMobileCPUProfiler> profiler(m, filename, args);
47    * m.forward(...);
48    *
49    * Since KinetoEdgeCPUProfiler object would then be constructed on heap
50    * with its lifetime managed manually or via smart pointers.
51    */
52   KinetoEdgeCPUProfiler(
53       const torch::jit::mobile::Module& m,
54       const std::string& fname,
55       const bool report_input_shapes = false,
56       const bool profile_memory = false,
57       const bool with_stack = false,
58       const bool with_flops = false,
59       const bool with_modules = false,
60       std::vector<std::string> events = {},
61       const bool adjust_vulkan_timestamps = false);
62 
63   const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
64   disableProfiler();
65   const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
66   getProfilerResult();
67   void recordBackendEvent(
68       const int64_t start_time_us,
69       const int64_t end_time_us,
70       const int64_t debug_handle,
71       const std::string& event_name,
72       const std::string& backend_name);
73   void recordBackendMemoryEvent(
74       void* ptr,
75       int64_t alloc_size,
76       size_t total_allocated,
77       size_t total_reserved,
78       c10::Device device);
79 
80   ~KinetoEdgeCPUProfiler();
81 
82  private:
83   /*
84    * We store a reference to Module to make such dependency explicit, since
85    * a Module reference is already stored in a functor.
86    */
87   const mobile::Module& m_;
88   std::string trace_file_name_;
89   std::unique_ptr<torch::autograd::profiler::ProfilerResult> profiler_result_;
90 };
91 
92 TORCH_API KinetoEdgeCPUProfiler* getCurrentEdgeProfiler();
93 
94 #define RECORD_BACKEND_EVENT_TO_EDGE_PROFILER(                               \
95     start_time_us, end_time_us, debug_handle, event_name, backend_name)      \
96   if (mobile::getCurrentEdgeProfiler()) {                                    \
97     mobile::getCurrentEdgeProfiler()->recordBackendEvent(                    \
98         start_time_us, end_time_us, debug_handle, event_name, backend_name); \
99   }
100 
101 #define RECORD_BACKEND_MEMORY_EVENT_TO_EDGE_PROFILER(              \
102     ptr, alloc_size, total_allocated, total_reserved, device)      \
103   if (mobile::getCurrentEdgeProfiler()) {                          \
104     mobile::getCurrentEdgeProfiler()->recordBackendMemoryEvent(    \
105         ptr, alloc_size, total_allocated, total_reserved, device); \
106   }
107 #else
108 
109 #define RECORD_BACKEND_EVENT_TO_EDGE_PROFILER( \
110     start_time_us, end_time_us, debug_handle, event_name, backend_name)
111 
112 #define RECORD_BACKEND_MEMORY_EVENT_TO_EDGE_PROFILER( \
113     ptr, alloc_size, total_allocated, total_reserved, device)
114 #endif
115 } // namespace torch::jit::mobile
116