1 #pragma once 2 #include <torch/csrc/autograd/profiler_kineto.h> 3 #include <torch/csrc/jit/mobile/module.h> 4 5 namespace torch::jit::mobile { 6 7 // If we dont have kineto available then edge profiler does not 8 // work since it relies on Kineto 9 #ifdef USE_KINETO 10 class TORCH_API KinetoEdgeCPUProfiler { 11 public: 12 // This profiler only profiles KINETO events 13 // No GPU_FALLBACK or NVTX 14 /* 15 * @param m is the instance of mobile Module which is being profiled. 16 * Note that this implies that KinetoEdgeCPUProfiler can be used 17 * to profile specific Module (see usage below), unliked ProfilerKineto 18 * which can profile pytorch runtime in arbitrary scope. 19 * @param fname is the name of the file to which chrome trace is written. 20 * @param report_input_shapes: whether to record shapes of op's inputs. 21 * @param with_stack: whether to record model's python stacktrace for the op. 22 * @param with_flops: whether to report flops corresponding to the op. 23 * @param with_modules: whether to report original python module 24 * hierarchy to which the op belongs. 25 * @param events 26 * @param adjust_vulkan_timestamps: whether to adjust vulkan timestamps from 27 * query pool to align with cpu event times 28 * 29 * Usage pattern for this profiler must be as follows: 30 * 31 * { 32 * KinetoEdgeCPUProfiler(m, filename, args); 33 * m.forward(...); 34 * } 35 * 36 * The reason being that KinetoEdgeCPUProfiler has a dependency on Module 37 * and thus it must not outlive it. 38 * 39 * Thus, when KinetoEdgeCPUProfiler is used as RAII to do profiling 40 * within certain scope. In that scope, the captured reference to 41 * Module will outlive KinetoEdgeCPUProfiler. This is gauranteed because 42 * KinetoEdgeCPUProfiler must be constructed later than Module, on stack. 43 * 44 * An example of the anti-pattern and wrong usage is: 45 * 46 * std::shared_ptr<KinetoMobileCPUProfiler> profiler(m, filename, args); 47 * m.forward(...); 48 * 49 * Since KinetoEdgeCPUProfiler object would then be constructed on heap 50 * with its lifetime managed manually or via smart pointers. 51 */ 52 KinetoEdgeCPUProfiler( 53 const torch::jit::mobile::Module& m, 54 const std::string& fname, 55 const bool report_input_shapes = false, 56 const bool profile_memory = false, 57 const bool with_stack = false, 58 const bool with_flops = false, 59 const bool with_modules = false, 60 std::vector<std::string> events = {}, 61 const bool adjust_vulkan_timestamps = false); 62 63 const std::unique_ptr<torch::autograd::profiler::ProfilerResult>& 64 disableProfiler(); 65 const std::unique_ptr<torch::autograd::profiler::ProfilerResult>& 66 getProfilerResult(); 67 void recordBackendEvent( 68 const int64_t start_time_us, 69 const int64_t end_time_us, 70 const int64_t debug_handle, 71 const std::string& event_name, 72 const std::string& backend_name); 73 void recordBackendMemoryEvent( 74 void* ptr, 75 int64_t alloc_size, 76 size_t total_allocated, 77 size_t total_reserved, 78 c10::Device device); 79 80 ~KinetoEdgeCPUProfiler(); 81 82 private: 83 /* 84 * We store a reference to Module to make such dependency explicit, since 85 * a Module reference is already stored in a functor. 86 */ 87 const mobile::Module& m_; 88 std::string trace_file_name_; 89 std::unique_ptr<torch::autograd::profiler::ProfilerResult> profiler_result_; 90 }; 91 92 TORCH_API KinetoEdgeCPUProfiler* getCurrentEdgeProfiler(); 93 94 #define RECORD_BACKEND_EVENT_TO_EDGE_PROFILER( \ 95 start_time_us, end_time_us, debug_handle, event_name, backend_name) \ 96 if (mobile::getCurrentEdgeProfiler()) { \ 97 mobile::getCurrentEdgeProfiler()->recordBackendEvent( \ 98 start_time_us, end_time_us, debug_handle, event_name, backend_name); \ 99 } 100 101 #define RECORD_BACKEND_MEMORY_EVENT_TO_EDGE_PROFILER( \ 102 ptr, alloc_size, total_allocated, total_reserved, device) \ 103 if (mobile::getCurrentEdgeProfiler()) { \ 104 mobile::getCurrentEdgeProfiler()->recordBackendMemoryEvent( \ 105 ptr, alloc_size, total_allocated, total_reserved, device); \ 106 } 107 #else 108 109 #define RECORD_BACKEND_EVENT_TO_EDGE_PROFILER( \ 110 start_time_us, end_time_us, debug_handle, event_name, backend_name) 111 112 #define RECORD_BACKEND_MEMORY_EVENT_TO_EDGE_PROFILER( \ 113 ptr, alloc_size, total_allocated, total_reserved, device) 114 #endif 115 } // namespace torch::jit::mobile 116