1 #include <c10/core/Allocator.h>
2 #include <c10/util/Exception.h>
3 #include <c10/util/overloaded.h>
4 #include <torch/csrc/jit/mobile/profiler_edge.h>
5 #include <string>
6 #include <vector>
7
8 namespace torch::jit::mobile {
9
10 thread_local KinetoEdgeCPUProfiler* tls_edge_profiler{nullptr};
11
KinetoEdgeCPUProfiler(const torch::jit::mobile::Module & m,const std::string & fname,const bool report_input_shapes,const bool profile_memory,const bool with_stack,const bool with_flops,const bool with_modules,std::vector<std::string> events,const bool adjust_vulkan_timestamps)12 KinetoEdgeCPUProfiler::KinetoEdgeCPUProfiler(
13 const torch::jit::mobile::Module& m,
14 const std::string& fname,
15 const bool report_input_shapes,
16 const bool profile_memory,
17 const bool with_stack,
18 const bool with_flops,
19 const bool with_modules,
20 std::vector<std::string> events,
21 const bool adjust_vulkan_timestamps)
22 : m_(m), trace_file_name_(fname) {
23 torch::profiler::impl::ExperimentalConfig experimental_config;
24 // Enable hardware counters
25 if (!events.empty()) {
26 experimental_config.performance_events = std::move(events);
27 }
28
29 // Adjust vulkan timestamps from query pool to align with cpu event times
30 experimental_config.adjust_timestamps = adjust_vulkan_timestamps;
31
32 torch::profiler::impl::ProfilerConfig config(
33 torch::profiler::impl::ProfilerState::KINETO,
34 report_input_shapes,
35 profile_memory,
36 with_stack,
37 with_flops,
38 with_modules,
39 experimental_config);
40 torch::autograd::profiler::prepareProfiler(
41 config, {torch::autograd::profiler::ActivityType::CPU});
42 if (with_modules || with_stack) {
43 auto post_processing = [this, with_stack, with_modules](
44 int64_t debug_handle,
45 std::vector<std::string>& jit_stack,
46 std::vector<std::string>& jit_modules) {
47 std::string no_debug_info("Model was not saved with debug information");
48 if (with_modules) {
49 // Since KinetoEvents's module hierarchy takes vector of strings
50 // we just construct a temporary vector using one string element
51 jit_modules = std::vector<std::string>(
52 {this->m_.hasDebugHandles()
53 ? this->m_.getModuleHierarchy(debug_handle)
54 : no_debug_info});
55 } else if (with_stack) {
56 // Since KinetoEvents's stack trace takes vector of strings we
57 // just construct a temporary vector using one string element
58 jit_stack = std::vector<std::string>(
59 {this->m_.hasDebugHandles() ? this->m_.getCallStack(debug_handle)
60 : no_debug_info});
61 }
62 };
63 torch::autograd::profiler::enableProfilerWithEventPostProcess(
64 config,
65 {torch::autograd::profiler::ActivityType::CPU},
66 post_processing,
67 {at::RecordScope::LITE_INTERPRETER});
68 } else {
69 torch::autograd::profiler::enableProfiler(
70 config,
71 {torch::autograd::profiler::ActivityType::CPU},
72 {at::RecordScope::LITE_INTERPRETER});
73 }
74 trace_file_name_ = fname;
75 TORCH_CHECK(
76 tls_edge_profiler == nullptr, "Edge profiler is already profiling.")
77 tls_edge_profiler = this;
78 }
79
recordBackendMemoryEvent(void * ptr,int64_t alloc_size,size_t total_allocated,size_t total_reserved,c10::Device device)80 void KinetoEdgeCPUProfiler::recordBackendMemoryEvent(
81 void* ptr,
82 int64_t alloc_size,
83 size_t total_allocated,
84 size_t total_reserved,
85 c10::Device device) {
86 c10::reportMemoryUsageToProfiler(
87 ptr, alloc_size, total_allocated, total_reserved, device);
88 }
89
recordBackendEvent(const int64_t start_time_us,const int64_t end_time_us,const int64_t debug_handle,const std::string & event_name,const std::string & backend_name)90 void KinetoEdgeCPUProfiler::recordBackendEvent(
91 const int64_t start_time_us,
92 const int64_t end_time_us,
93 const int64_t debug_handle,
94 const std::string& event_name,
95 const std::string& backend_name) {
96 torch::autograd::profiler::reportBackendEventToActiveKinetoProfiler(
97 start_time_us,
98 end_time_us,
99 debug_handle,
100 at::RecordScope::LITE_INTERPRETER,
101 event_name,
102 backend_name);
103 }
104
105 const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
disableProfiler()106 KinetoEdgeCPUProfiler::disableProfiler() {
107 TORCH_CHECK(
108 !profiler_result_,
109 "KinetoEdgeCPUProfiler already disabled. "
110 "To get list of events use getProfilerResults()");
111 profiler_result_ = torch::autograd::profiler::disableProfiler();
112 return profiler_result_;
113 }
114
115 const std::unique_ptr<torch::autograd::profiler::ProfilerResult>&
getProfilerResult()116 KinetoEdgeCPUProfiler::getProfilerResult() {
117 TORCH_CHECK(
118 profiler_result_,
119 "KinetoEdgeCPUProfiler has not been disabled. "
120 "use disableProfiler() API first, which returns the ProfilerResult.");
121 return profiler_result_;
122 }
123
~KinetoEdgeCPUProfiler()124 KinetoEdgeCPUProfiler::~KinetoEdgeCPUProfiler() {
125 if (!trace_file_name_.empty()) {
126 if (profiler_result_) {
127 profiler_result_->save(trace_file_name_);
128 } else {
129 torch::autograd::profiler::disableProfiler()->save(trace_file_name_);
130 }
131 }
132 tls_edge_profiler = nullptr;
133 }
134
getCurrentEdgeProfiler()135 KinetoEdgeCPUProfiler* getCurrentEdgeProfiler() {
136 return tls_edge_profiler;
137 }
138
139 } // namespace torch::jit::mobile
140