xref: /aosp_15_r20/external/executorch/docs/source/tutorials_source/devtools-integration-tutorial.py (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1# -*- coding: utf-8 -*-
2# Copyright (c) Meta Platforms, Inc. and affiliates.
3# All rights reserved.
4#
5# This source code is licensed under the BSD-style license found in the
6# LICENSE file in the root directory of this source tree.
7
8"""
9Using the ExecuTorch Developer Tools to Profile a Model
10========================
11
12**Author:** `Jack Khuu <https://github.com/Jack-Khuu>`__
13"""
14
15######################################################################
16# The `ExecuTorch Developer Tools <../devtools-overview.html>`__ is a set of tools designed to
17# provide users with the ability to profile, debug, and visualize ExecuTorch
18# models.
19#
20# This tutorial will show a full end-to-end flow of how to utilize the Developer Tools to profile a model.
21# Specifically, it will:
22#
23# 1. Generate the artifacts consumed by the Developer Tools (`ETRecord <../etrecord.html>`__, `ETDump <../etdump.html>`__).
24# 2. Create an Inspector class consuming these artifacts.
25# 3. Utilize the Inspector class to analyze the model profiling result.
26
27######################################################################
28# Prerequisites
29# -------------
30#
31# To run this tutorial, you’ll first need to
32# `Set up your ExecuTorch environment <../getting-started-setup.html>`__.
33#
34
35######################################################################
36# Generate ETRecord (Optional)
37# ----------------------------
38#
39# The first step is to generate an ``ETRecord``. ``ETRecord`` contains model
40# graphs and metadata for linking runtime results (such as profiling) to
41# the eager model. This is generated via ``executorch.devtools.generate_etrecord``.
42#
43# ``executorch.devtools.generate_etrecord`` takes in an output file path (str), the
44# edge dialect model (``EdgeProgramManager``), the ExecuTorch dialect model
45# (``ExecutorchProgramManager``), and an optional dictionary of additional models.
46#
47# In this tutorial, an example model (shown below) is used to demonstrate.
48
49import copy
50
51import torch
52import torch.nn as nn
53import torch.nn.functional as F
54from executorch.devtools import generate_etrecord
55
56from executorch.exir import (
57    EdgeCompileConfig,
58    EdgeProgramManager,
59    ExecutorchProgramManager,
60    to_edge,
61)
62from torch.export import export, ExportedProgram
63
64
65# Generate Model
66class Net(nn.Module):
67    def __init__(self):
68        super(Net, self).__init__()
69        # 1 input image channel, 6 output channels, 5x5 square convolution
70        # kernel
71        self.conv1 = nn.Conv2d(1, 6, 5)
72        self.conv2 = nn.Conv2d(6, 16, 5)
73        # an affine operation: y = Wx + b
74        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5*5 from image dimension
75        self.fc2 = nn.Linear(120, 84)
76        self.fc3 = nn.Linear(84, 10)
77
78    def forward(self, x):
79        # Max pooling over a (2, 2) window
80        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
81        # If the size is a square, you can specify with a single number
82        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
83        x = torch.flatten(x, 1)  # flatten all dimensions except the batch dimension
84        x = F.relu(self.fc1(x))
85        x = F.relu(self.fc2(x))
86        x = self.fc3(x)
87        return x
88
89
90model = Net()
91
92aten_model: ExportedProgram = export(
93    model,
94    (torch.randn(1, 1, 32, 32),),
95)
96
97edge_program_manager: EdgeProgramManager = to_edge(
98    aten_model, compile_config=EdgeCompileConfig(_check_ir_validity=True)
99)
100edge_program_manager_copy = copy.deepcopy(edge_program_manager)
101et_program_manager: ExecutorchProgramManager = edge_program_manager.to_executorch()
102
103
104# Generate ETRecord
105etrecord_path = "etrecord.bin"
106generate_etrecord(etrecord_path, edge_program_manager_copy, et_program_manager)
107
108# sphinx_gallery_start_ignore
109from unittest.mock import patch
110
111# sphinx_gallery_end_ignore
112
113######################################################################
114#
115# .. warning::
116#    Users should do a deepcopy of the output of ``to_edge()`` and pass in the
117#    deepcopy to the ``generate_etrecord`` API. This is needed because the
118#    subsequent call, ``to_executorch()``, does an in-place mutation and will
119#    lose debug data in the process.
120#
121
122######################################################################
123# Generate ETDump
124# ---------------
125#
126# Next step is to generate an ``ETDump``. ``ETDump`` contains runtime results
127# from executing a `Bundled Program Model <../bundled-io.html>`__.
128#
129# In this tutorial, a `Bundled Program` is created from the example model above.
130
131import torch
132from executorch.devtools import BundledProgram
133
134from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite
135from executorch.devtools.bundled_program.serialize import (
136    serialize_from_bundled_program_to_flatbuffer,
137)
138
139from executorch.exir import to_edge
140from torch.export import export
141
142# Step 1: ExecuTorch Program Export
143m_name = "forward"
144method_graphs = {m_name: export(model, (torch.randn(1, 1, 32, 32),))}
145
146# Step 2: Construct Method Test Suites
147inputs = [[torch.randn(1, 1, 32, 32)] for _ in range(2)]
148
149method_test_suites = [
150    MethodTestSuite(
151        method_name=m_name,
152        test_cases=[
153            MethodTestCase(inputs=inp, expected_outputs=getattr(model, m_name)(*inp))
154            for inp in inputs
155        ],
156    )
157]
158
159# Step 3: Generate BundledProgram
160executorch_program = to_edge(method_graphs).to_executorch()
161bundled_program = BundledProgram(executorch_program, method_test_suites)
162
163# Step 4: Serialize BundledProgram to flatbuffer.
164serialized_bundled_program = serialize_from_bundled_program_to_flatbuffer(
165    bundled_program
166)
167save_path = "bundled_program.bp"
168with open(save_path, "wb") as f:
169    f.write(serialized_bundled_program)
170
171######################################################################
172# Use CMake (follow `these instructions <../runtime-build-and-cross-compilation.html#configure-the-cmake-build>`__ to set up cmake) to execute the Bundled Program to generate the ``ETDump``::
173#
174#       cd executorch
175#       ./examples/devtools/build_example_runner.sh
176#       cmake-out/examples/devtools/example_runner --bundled_program_path="bundled_program.bp"
177
178######################################################################
179# Creating an Inspector
180# ---------------------
181#
182# Final step is to create the ``Inspector`` by passing in the artifact paths.
183# Inspector takes the runtime results from ``ETDump`` and correlates them to
184# the operators of the Edge Dialect Graph.
185#
186# Recall: An ``ETRecord`` is not required. If an ``ETRecord`` is not provided,
187# the Inspector will show runtime results without operator correlation.
188#
189# To visualize all runtime events, call Inspector's ``print_data_tabular``.
190
191from executorch.devtools import Inspector
192
193# sphinx_gallery_start_ignore
194inspector_patch = patch.object(Inspector, "__init__", return_value=None)
195inspector_patch_print = patch.object(Inspector, "print_data_tabular", return_value="")
196inspector_patch.start()
197inspector_patch_print.start()
198# sphinx_gallery_end_ignore
199etrecord_path = "etrecord.bin"
200etdump_path = "etdump.etdp"
201inspector = Inspector(etdump_path=etdump_path, etrecord=etrecord_path)
202# sphinx_gallery_start_ignore
203inspector.event_blocks = []
204# sphinx_gallery_end_ignore
205inspector.print_data_tabular()
206
207# sphinx_gallery_start_ignore
208inspector_patch.stop()
209inspector_patch_print.stop()
210# sphinx_gallery_end_ignore
211
212######################################################################
213# Analyzing with an Inspector
214# ---------------------------
215#
216# ``Inspector`` provides 2 ways of accessing ingested information: `EventBlocks <../model-inspector#eventblock-class>`__
217# and ``DataFrames``. These mediums give users the ability to perform custom
218# analysis about their model performance.
219#
220# Below are examples usages, with both ``EventBlock`` and ``DataFrame`` approaches.
221
222# Set Up
223import pprint as pp
224
225import pandas as pd
226
227pd.set_option("display.max_colwidth", None)
228pd.set_option("display.max_columns", None)
229
230######################################################################
231# If a user wants the raw profiling results, they would do something similar to
232# finding the raw runtime data of an ``addmm.out`` event.
233
234for event_block in inspector.event_blocks:
235    # Via EventBlocks
236    for event in event_block.events:
237        if event.name == "native_call_addmm.out":
238            print(event.name, event.perf_data.raw)
239
240    # Via Dataframe
241    df = event_block.to_dataframe()
242    df = df[df.event_name == "native_call_addmm.out"]
243    print(df[["event_name", "raw"]])
244    print()
245
246######################################################################
247# If a user wants to trace an operator back to their model code, they would do
248# something similar to finding the module hierarchy and stack trace of the
249# slowest ``convolution.out`` call.
250
251for event_block in inspector.event_blocks:
252    # Via EventBlocks
253    slowest = None
254    for event in event_block.events:
255        if event.name == "native_call_convolution.out":
256            if slowest is None or event.perf_data.p50 > slowest.perf_data.p50:
257                slowest = event
258    if slowest is not None:
259        print(slowest.name)
260        print()
261        pp.pprint(slowest.stack_traces)
262        print()
263        pp.pprint(slowest.module_hierarchy)
264
265    # Via Dataframe
266    df = event_block.to_dataframe()
267    df = df[df.event_name == "native_call_convolution.out"]
268    if len(df) > 0:
269        slowest = df.loc[df["p50"].idxmax()]
270        print(slowest.event_name)
271        print()
272        pp.pprint(slowest.stack_traces)
273        print()
274        pp.pprint(slowest.module_hierarchy)
275
276######################################################################
277# If a user wants the total runtime of a module, they can use
278# ``find_total_for_module``.
279
280print(inspector.find_total_for_module("L__self__"))
281print(inspector.find_total_for_module("L__self___conv2"))
282
283######################################################################
284# Note: ``find_total_for_module`` is a special first class method of
285# `Inspector <../model-inspector.html>`__
286
287######################################################################
288# Conclusion
289# ----------
290#
291# In this tutorial, we learned about the steps required to consume an ExecuTorch
292# model with the ExecuTorch Developer Tools. It also showed how to use the Inspector APIs
293# to analyze the model run results.
294#
295# Links Mentioned
296# ^^^^^^^^^^^^^^^
297#
298# - `ExecuTorch Developer Tools Overview <../devtools-overview.html>`__
299# - `ETRecord <../etrecord.html>`__
300# - `ETDump <../etdump.html>`__
301# - `Inspector <../model-inspector.html>`__
302