1# -*- coding: utf-8 -*- 2# Copyright (c) Meta Platforms, Inc. and affiliates. 3# All rights reserved. 4# 5# This source code is licensed under the BSD-style license found in the 6# LICENSE file in the root directory of this source tree. 7 8""" 9Using the ExecuTorch Developer Tools to Profile a Model 10======================== 11 12**Author:** `Jack Khuu <https://github.com/Jack-Khuu>`__ 13""" 14 15###################################################################### 16# The `ExecuTorch Developer Tools <../devtools-overview.html>`__ is a set of tools designed to 17# provide users with the ability to profile, debug, and visualize ExecuTorch 18# models. 19# 20# This tutorial will show a full end-to-end flow of how to utilize the Developer Tools to profile a model. 21# Specifically, it will: 22# 23# 1. Generate the artifacts consumed by the Developer Tools (`ETRecord <../etrecord.html>`__, `ETDump <../etdump.html>`__). 24# 2. Create an Inspector class consuming these artifacts. 25# 3. Utilize the Inspector class to analyze the model profiling result. 26 27###################################################################### 28# Prerequisites 29# ------------- 30# 31# To run this tutorial, you’ll first need to 32# `Set up your ExecuTorch environment <../getting-started-setup.html>`__. 33# 34 35###################################################################### 36# Generate ETRecord (Optional) 37# ---------------------------- 38# 39# The first step is to generate an ``ETRecord``. ``ETRecord`` contains model 40# graphs and metadata for linking runtime results (such as profiling) to 41# the eager model. This is generated via ``executorch.devtools.generate_etrecord``. 42# 43# ``executorch.devtools.generate_etrecord`` takes in an output file path (str), the 44# edge dialect model (``EdgeProgramManager``), the ExecuTorch dialect model 45# (``ExecutorchProgramManager``), and an optional dictionary of additional models. 46# 47# In this tutorial, an example model (shown below) is used to demonstrate. 48 49import copy 50 51import torch 52import torch.nn as nn 53import torch.nn.functional as F 54from executorch.devtools import generate_etrecord 55 56from executorch.exir import ( 57 EdgeCompileConfig, 58 EdgeProgramManager, 59 ExecutorchProgramManager, 60 to_edge, 61) 62from torch.export import export, ExportedProgram 63 64 65# Generate Model 66class Net(nn.Module): 67 def __init__(self): 68 super(Net, self).__init__() 69 # 1 input image channel, 6 output channels, 5x5 square convolution 70 # kernel 71 self.conv1 = nn.Conv2d(1, 6, 5) 72 self.conv2 = nn.Conv2d(6, 16, 5) 73 # an affine operation: y = Wx + b 74 self.fc1 = nn.Linear(16 * 5 * 5, 120) # 5*5 from image dimension 75 self.fc2 = nn.Linear(120, 84) 76 self.fc3 = nn.Linear(84, 10) 77 78 def forward(self, x): 79 # Max pooling over a (2, 2) window 80 x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) 81 # If the size is a square, you can specify with a single number 82 x = F.max_pool2d(F.relu(self.conv2(x)), 2) 83 x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension 84 x = F.relu(self.fc1(x)) 85 x = F.relu(self.fc2(x)) 86 x = self.fc3(x) 87 return x 88 89 90model = Net() 91 92aten_model: ExportedProgram = export( 93 model, 94 (torch.randn(1, 1, 32, 32),), 95) 96 97edge_program_manager: EdgeProgramManager = to_edge( 98 aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True) 99) 100edge_program_manager_copy = copy.deepcopy(edge_program_manager) 101et_program_manager: ExecutorchProgramManager = edge_program_manager.to_executorch() 102 103 104# Generate ETRecord 105etrecord_path = "etrecord.bin" 106generate_etrecord(etrecord_path, edge_program_manager_copy, et_program_manager) 107 108# sphinx_gallery_start_ignore 109from unittest.mock import patch 110 111# sphinx_gallery_end_ignore 112 113###################################################################### 114# 115# .. warning:: 116# Users should do a deepcopy of the output of ``to_edge()`` and pass in the 117# deepcopy to the ``generate_etrecord`` API. This is needed because the 118# subsequent call, ``to_executorch()``, does an in-place mutation and will 119# lose debug data in the process. 120# 121 122###################################################################### 123# Generate ETDump 124# --------------- 125# 126# Next step is to generate an ``ETDump``. ``ETDump`` contains runtime results 127# from executing a `Bundled Program Model <../bundled-io.html>`__. 128# 129# In this tutorial, a `Bundled Program` is created from the example model above. 130 131import torch 132from executorch.devtools import BundledProgram 133 134from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite 135from executorch.devtools.bundled_program.serialize import ( 136 serialize_from_bundled_program_to_flatbuffer, 137) 138 139from executorch.exir import to_edge 140from torch.export import export 141 142# Step 1: ExecuTorch Program Export 143m_name = "forward" 144method_graphs = {m_name: export(model, (torch.randn(1, 1, 32, 32),))} 145 146# Step 2: Construct Method Test Suites 147inputs = [[torch.randn(1, 1, 32, 32)] for _ in range(2)] 148 149method_test_suites = [ 150 MethodTestSuite( 151 method_name=m_name, 152 test_cases=[ 153 MethodTestCase(inputs=inp, expected_outputs=getattr(model, m_name)(*inp)) 154 for inp in inputs 155 ], 156 ) 157] 158 159# Step 3: Generate BundledProgram 160executorch_program = to_edge(method_graphs).to_executorch() 161bundled_program = BundledProgram(executorch_program, method_test_suites) 162 163# Step 4: Serialize BundledProgram to flatbuffer. 164serialized_bundled_program = serialize_from_bundled_program_to_flatbuffer( 165 bundled_program 166) 167save_path = "bundled_program.bp" 168with open(save_path, "wb") as f: 169 f.write(serialized_bundled_program) 170 171###################################################################### 172# Use CMake (follow `these instructions <../runtime-build-and-cross-compilation.html#configure-the-cmake-build>`__ to set up cmake) to execute the Bundled Program to generate the ``ETDump``:: 173# 174# cd executorch 175# ./examples/devtools/build_example_runner.sh 176# cmake-out/examples/devtools/example_runner --bundled_program_path="bundled_program.bp" 177 178###################################################################### 179# Creating an Inspector 180# --------------------- 181# 182# Final step is to create the ``Inspector`` by passing in the artifact paths. 183# Inspector takes the runtime results from ``ETDump`` and correlates them to 184# the operators of the Edge Dialect Graph. 185# 186# Recall: An ``ETRecord`` is not required. If an ``ETRecord`` is not provided, 187# the Inspector will show runtime results without operator correlation. 188# 189# To visualize all runtime events, call Inspector's ``print_data_tabular``. 190 191from executorch.devtools import Inspector 192 193# sphinx_gallery_start_ignore 194inspector_patch = patch.object(Inspector, "__init__", return_value=None) 195inspector_patch_print = patch.object(Inspector, "print_data_tabular", return_value="") 196inspector_patch.start() 197inspector_patch_print.start() 198# sphinx_gallery_end_ignore 199etrecord_path = "etrecord.bin" 200etdump_path = "etdump.etdp" 201inspector = Inspector(etdump_path=etdump_path, etrecord=etrecord_path) 202# sphinx_gallery_start_ignore 203inspector.event_blocks = [] 204# sphinx_gallery_end_ignore 205inspector.print_data_tabular() 206 207# sphinx_gallery_start_ignore 208inspector_patch.stop() 209inspector_patch_print.stop() 210# sphinx_gallery_end_ignore 211 212###################################################################### 213# Analyzing with an Inspector 214# --------------------------- 215# 216# ``Inspector`` provides 2 ways of accessing ingested information: `EventBlocks <../model-inspector#eventblock-class>`__ 217# and ``DataFrames``. These mediums give users the ability to perform custom 218# analysis about their model performance. 219# 220# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches. 221 222# Set Up 223import pprint as pp 224 225import pandas as pd 226 227pd.set_option("display.max_colwidth", None) 228pd.set_option("display.max_columns", None) 229 230###################################################################### 231# If a user wants the raw profiling results, they would do something similar to 232# finding the raw runtime data of an ``addmm.out`` event. 233 234for event_block in inspector.event_blocks: 235 # Via EventBlocks 236 for event in event_block.events: 237 if event.name == "native_call_addmm.out": 238 print(event.name, event.perf_data.raw) 239 240 # Via Dataframe 241 df = event_block.to_dataframe() 242 df = df[df.event_name == "native_call_addmm.out"] 243 print(df[["event_name", "raw"]]) 244 print() 245 246###################################################################### 247# If a user wants to trace an operator back to their model code, they would do 248# something similar to finding the module hierarchy and stack trace of the 249# slowest ``convolution.out`` call. 250 251for event_block in inspector.event_blocks: 252 # Via EventBlocks 253 slowest = None 254 for event in event_block.events: 255 if event.name == "native_call_convolution.out": 256 if slowest is None or event.perf_data.p50 > slowest.perf_data.p50: 257 slowest = event 258 if slowest is not None: 259 print(slowest.name) 260 print() 261 pp.pprint(slowest.stack_traces) 262 print() 263 pp.pprint(slowest.module_hierarchy) 264 265 # Via Dataframe 266 df = event_block.to_dataframe() 267 df = df[df.event_name == "native_call_convolution.out"] 268 if len(df) > 0: 269 slowest = df.loc[df["p50"].idxmax()] 270 print(slowest.event_name) 271 print() 272 pp.pprint(slowest.stack_traces) 273 print() 274 pp.pprint(slowest.module_hierarchy) 275 276###################################################################### 277# If a user wants the total runtime of a module, they can use 278# ``find_total_for_module``. 279 280print(inspector.find_total_for_module("L__self__")) 281print(inspector.find_total_for_module("L__self___conv2")) 282 283###################################################################### 284# Note: ``find_total_for_module`` is a special first class method of 285# `Inspector <../model-inspector.html>`__ 286 287###################################################################### 288# Conclusion 289# ---------- 290# 291# In this tutorial, we learned about the steps required to consume an ExecuTorch 292# model with the ExecuTorch Developer Tools. It also showed how to use the Inspector APIs 293# to analyze the model run results. 294# 295# Links Mentioned 296# ^^^^^^^^^^^^^^^ 297# 298# - `ExecuTorch Developer Tools Overview <../devtools-overview.html>`__ 299# - `ETRecord <../etrecord.html>`__ 300# - `ETDump <../etdump.html>`__ 301# - `Inspector <../model-inspector.html>`__ 302