xref: /aosp_15_r20/external/executorch/profiler/parse_profiler_results.py (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1*523fa7a6SAndroid Build Coastguard Worker# Copyright (c) Meta Platforms, Inc. and affiliates.
2*523fa7a6SAndroid Build Coastguard Worker# All rights reserved.
3*523fa7a6SAndroid Build Coastguard Worker#
4*523fa7a6SAndroid Build Coastguard Worker# This source code is licensed under the BSD-style license found in the
5*523fa7a6SAndroid Build Coastguard Worker# LICENSE file in the root directory of this source tree.
6*523fa7a6SAndroid Build Coastguard Worker
7*523fa7a6SAndroid Build Coastguard Workerimport dataclasses
8*523fa7a6SAndroid Build Coastguard Workerimport struct
9*523fa7a6SAndroid Build Coastguard Workerfrom collections import OrderedDict
10*523fa7a6SAndroid Build Coastguard Workerfrom enum import Enum
11*523fa7a6SAndroid Build Coastguard Worker
12*523fa7a6SAndroid Build Coastguard Workerfrom typing import Dict, List, Tuple
13*523fa7a6SAndroid Build Coastguard Worker
14*523fa7a6SAndroid Build Coastguard Workerfrom prettytable import PrettyTable
15*523fa7a6SAndroid Build Coastguard Worker
16*523fa7a6SAndroid Build Coastguard Worker# This version number should match the one defined in profiler.h
17*523fa7a6SAndroid Build Coastguard WorkerET_PROF_VER = 0x00000001
18*523fa7a6SAndroid Build Coastguard Worker
19*523fa7a6SAndroid Build Coastguard Worker# This string defines the layout of the prof_result_t struct
20*523fa7a6SAndroid Build Coastguard Worker# defined in executorch/profiler/profiler.h. This is used to
21*523fa7a6SAndroid Build Coastguard Worker# unpack the binary data to derive the profiling results.
22*523fa7a6SAndroid Build Coastguard Worker# To align the end of a structure to the alignment requirement
23*523fa7a6SAndroid Build Coastguard Worker# of a particular type, end the format with the code for that
24*523fa7a6SAndroid Build Coastguard Worker# type with a repeat count of zero. Adding 0Q at the ending ensures
25*523fa7a6SAndroid Build Coastguard Worker# that the struct is aligned by 8 bytes which is the alignment we
26*523fa7a6SAndroid Build Coastguard Worker# impose in the runtime.
27*523fa7a6SAndroid Build Coastguard WorkerPROF_HEADER_STRUCT_FMT = "32s7I0Q"
28*523fa7a6SAndroid Build Coastguard WorkerPROF_RESULT_STRUCT_FMT = "32siIQQ0Q"
29*523fa7a6SAndroid Build Coastguard WorkerALLOCATOR_STRUCT_FMT = "32sQ0Q"
30*523fa7a6SAndroid Build Coastguard WorkerALLOCATION_STRUCT_FMT = "2I0Q"
31*523fa7a6SAndroid Build Coastguard WorkerCHAIN_IDX_NO_CHAIN = -1
32*523fa7a6SAndroid Build Coastguard Worker
33*523fa7a6SAndroid Build Coastguard Worker
34*523fa7a6SAndroid Build Coastguard Workerclass TimeScale(Enum):
35*523fa7a6SAndroid Build Coastguard Worker    TIME_IN_NS = 0
36*523fa7a6SAndroid Build Coastguard Worker    TIME_IN_US = 1
37*523fa7a6SAndroid Build Coastguard Worker    TIME_IN_MS = 2
38*523fa7a6SAndroid Build Coastguard Worker    CPU_CYCLES = 3
39*523fa7a6SAndroid Build Coastguard Worker
40*523fa7a6SAndroid Build Coastguard Worker
41*523fa7a6SAndroid Build Coastguard Worker# These data classes represent the structures used on device to
42*523fa7a6SAndroid Build Coastguard Worker# log various forms of profiling data.
43*523fa7a6SAndroid Build Coastguard Worker@dataclasses.dataclass
44*523fa7a6SAndroid Build Coastguard Workerclass ProfileData:
45*523fa7a6SAndroid Build Coastguard Worker    name: str
46*523fa7a6SAndroid Build Coastguard Worker    chain_idx: int
47*523fa7a6SAndroid Build Coastguard Worker    instruction_idx: int
48*523fa7a6SAndroid Build Coastguard Worker    start_time: int
49*523fa7a6SAndroid Build Coastguard Worker    end_time: int
50*523fa7a6SAndroid Build Coastguard Worker
51*523fa7a6SAndroid Build Coastguard Worker
52*523fa7a6SAndroid Build Coastguard Worker@dataclasses.dataclass
53*523fa7a6SAndroid Build Coastguard Workerclass ProfilerHeader:
54*523fa7a6SAndroid Build Coastguard Worker    name: str
55*523fa7a6SAndroid Build Coastguard Worker    prof_ver: int
56*523fa7a6SAndroid Build Coastguard Worker    max_prof_entries: int
57*523fa7a6SAndroid Build Coastguard Worker    prof_entries: int
58*523fa7a6SAndroid Build Coastguard Worker    max_allocator_entries: int
59*523fa7a6SAndroid Build Coastguard Worker    allocator_entries: int
60*523fa7a6SAndroid Build Coastguard Worker    max_mem_prof_entries: int
61*523fa7a6SAndroid Build Coastguard Worker    mem_prof_entries: int
62*523fa7a6SAndroid Build Coastguard Worker
63*523fa7a6SAndroid Build Coastguard Worker
64*523fa7a6SAndroid Build Coastguard Worker@dataclasses.dataclass
65*523fa7a6SAndroid Build Coastguard Workerclass Allocator:
66*523fa7a6SAndroid Build Coastguard Worker    name: str
67*523fa7a6SAndroid Build Coastguard Worker    allocator_id: int
68*523fa7a6SAndroid Build Coastguard Worker
69*523fa7a6SAndroid Build Coastguard Worker
70*523fa7a6SAndroid Build Coastguard Worker@dataclasses.dataclass
71*523fa7a6SAndroid Build Coastguard Workerclass MemAllocation:
72*523fa7a6SAndroid Build Coastguard Worker    allocator_id: int
73*523fa7a6SAndroid Build Coastguard Worker    allocation_size: int
74*523fa7a6SAndroid Build Coastguard Worker
75*523fa7a6SAndroid Build Coastguard Worker
76*523fa7a6SAndroid Build Coastguard Worker"""
77*523fa7a6SAndroid Build Coastguard WorkerThese data classes are derived from the post-processing of the
78*523fa7a6SAndroid Build Coastguard Workerprofiling data retrieved from the runtime. If there are multiple
79*523fa7a6SAndroid Build Coastguard Workerblocks of profiling data resulting from iterations of the same code
80*523fa7a6SAndroid Build Coastguard Workersegment then corresponding entries will be consolidated with each
81*523fa7a6SAndroid Build Coastguard Workerentry in the consolidated list representing one iteration.
82*523fa7a6SAndroid Build Coastguard Worker"""
83*523fa7a6SAndroid Build Coastguard Worker
84*523fa7a6SAndroid Build Coastguard Worker
85*523fa7a6SAndroid Build Coastguard Worker@dataclasses.dataclass
86*523fa7a6SAndroid Build Coastguard Workerclass ProfileEvent:
87*523fa7a6SAndroid Build Coastguard Worker    name: str
88*523fa7a6SAndroid Build Coastguard Worker    ts: List[float]
89*523fa7a6SAndroid Build Coastguard Worker    duration: List[float]
90*523fa7a6SAndroid Build Coastguard Worker    chain_idx: int = -1
91*523fa7a6SAndroid Build Coastguard Worker    instruction_idx: int = -1
92*523fa7a6SAndroid Build Coastguard Worker    # pyre-ignore[8]: Incompatible attribute type
93*523fa7a6SAndroid Build Coastguard Worker    stacktrace: str = None
94*523fa7a6SAndroid Build Coastguard Worker
95*523fa7a6SAndroid Build Coastguard Worker
96*523fa7a6SAndroid Build Coastguard Worker@dataclasses.dataclass
97*523fa7a6SAndroid Build Coastguard Workerclass ProfileEventFrameworkTax:
98*523fa7a6SAndroid Build Coastguard Worker    exec_time: List[int]
99*523fa7a6SAndroid Build Coastguard Worker    kernel_and_delegate_time: List[int]
100*523fa7a6SAndroid Build Coastguard Worker    framework_tax: List[float]
101*523fa7a6SAndroid Build Coastguard Worker
102*523fa7a6SAndroid Build Coastguard Worker
103*523fa7a6SAndroid Build Coastguard Worker@dataclasses.dataclass
104*523fa7a6SAndroid Build Coastguard Workerclass MemEvent:
105*523fa7a6SAndroid Build Coastguard Worker    allocator_name: str
106*523fa7a6SAndroid Build Coastguard Worker    total_allocations_done: int
107*523fa7a6SAndroid Build Coastguard Worker
108*523fa7a6SAndroid Build Coastguard Worker
109*523fa7a6SAndroid Build Coastguard Workerdef adjust_time_scale(event: ProfileData, time_scale: TimeScale):
110*523fa7a6SAndroid Build Coastguard Worker    time_div_factor = {
111*523fa7a6SAndroid Build Coastguard Worker        TimeScale.CPU_CYCLES: 1,
112*523fa7a6SAndroid Build Coastguard Worker        TimeScale.TIME_IN_MS: 1,
113*523fa7a6SAndroid Build Coastguard Worker        TimeScale.TIME_IN_US: 1000,
114*523fa7a6SAndroid Build Coastguard Worker        TimeScale.TIME_IN_NS: 1000000,
115*523fa7a6SAndroid Build Coastguard Worker    }
116*523fa7a6SAndroid Build Coastguard Worker    div_factor = time_div_factor[time_scale]
117*523fa7a6SAndroid Build Coastguard Worker    if div_factor != 1:
118*523fa7a6SAndroid Build Coastguard Worker        duration = round((event.end_time - event.start_time) / div_factor, 4)
119*523fa7a6SAndroid Build Coastguard Worker        start_time = round((event.start_time) / div_factor, 4)
120*523fa7a6SAndroid Build Coastguard Worker    else:
121*523fa7a6SAndroid Build Coastguard Worker        duration = event.end_time - event.start_time
122*523fa7a6SAndroid Build Coastguard Worker        start_time = event.start_time
123*523fa7a6SAndroid Build Coastguard Worker    return start_time, duration
124*523fa7a6SAndroid Build Coastguard Worker
125*523fa7a6SAndroid Build Coastguard Worker
126*523fa7a6SAndroid Build Coastguard Workerdef parse_prof_blocks(
127*523fa7a6SAndroid Build Coastguard Worker    prof_blocks: Dict[str, List[Tuple[List[ProfileData], List[MemAllocation]]]],
128*523fa7a6SAndroid Build Coastguard Worker    allocator_dict: Dict[int, str],
129*523fa7a6SAndroid Build Coastguard Worker    time_scale: TimeScale,
130*523fa7a6SAndroid Build Coastguard Worker) -> Tuple[Dict[str, List[ProfileEvent]], Dict[str, List[MemEvent]]]:
131*523fa7a6SAndroid Build Coastguard Worker
132*523fa7a6SAndroid Build Coastguard Worker    prof_data = OrderedDict()
133*523fa7a6SAndroid Build Coastguard Worker    mem_prof_data = OrderedDict()
134*523fa7a6SAndroid Build Coastguard Worker
135*523fa7a6SAndroid Build Coastguard Worker    # Iterate through all the profiling blocks data that have been grouped by name.
136*523fa7a6SAndroid Build Coastguard Worker    for name, data_list in prof_blocks.items():
137*523fa7a6SAndroid Build Coastguard Worker        prof_data_list = []
138*523fa7a6SAndroid Build Coastguard Worker        mem_prof_data_list = []
139*523fa7a6SAndroid Build Coastguard Worker        # Each entry in data_list is a tuple in which the first entry is profiling data
140*523fa7a6SAndroid Build Coastguard Worker        # and the second entry is memory allocation data, also each entry in data_list
141*523fa7a6SAndroid Build Coastguard Worker        # represents one iteration of a code block.
142*523fa7a6SAndroid Build Coastguard Worker        for i in range(len(data_list)):
143*523fa7a6SAndroid Build Coastguard Worker            for idx, event in enumerate(data_list[i][0]):
144*523fa7a6SAndroid Build Coastguard Worker                # If the event represented by the index idx already exists in the list
145*523fa7a6SAndroid Build Coastguard Worker                # then just append the new time entry to the duration list present in
146*523fa7a6SAndroid Build Coastguard Worker                # the event object. If it doesn't exist then create a new entry and add
147*523fa7a6SAndroid Build Coastguard Worker                # it to the list.
148*523fa7a6SAndroid Build Coastguard Worker                if idx < len(prof_data_list):
149*523fa7a6SAndroid Build Coastguard Worker                    start_time, duration = adjust_time_scale(event, time_scale)
150*523fa7a6SAndroid Build Coastguard Worker                    prof_data_list[idx].ts.append(start_time)
151*523fa7a6SAndroid Build Coastguard Worker                    prof_data_list[idx].duration.append(duration)
152*523fa7a6SAndroid Build Coastguard Worker                else:
153*523fa7a6SAndroid Build Coastguard Worker                    start_time, duration = adjust_time_scale(event, time_scale)
154*523fa7a6SAndroid Build Coastguard Worker                    prof_data_list.append(
155*523fa7a6SAndroid Build Coastguard Worker                        ProfileEvent(
156*523fa7a6SAndroid Build Coastguard Worker                            event.name,
157*523fa7a6SAndroid Build Coastguard Worker                            [start_time],
158*523fa7a6SAndroid Build Coastguard Worker                            [duration],
159*523fa7a6SAndroid Build Coastguard Worker                            event.chain_idx,
160*523fa7a6SAndroid Build Coastguard Worker                            event.instruction_idx,
161*523fa7a6SAndroid Build Coastguard Worker                        )
162*523fa7a6SAndroid Build Coastguard Worker                    )
163*523fa7a6SAndroid Build Coastguard Worker
164*523fa7a6SAndroid Build Coastguard Worker            # Collect all the memory allocation events of this iteration of the code block
165*523fa7a6SAndroid Build Coastguard Worker            for idx, event in enumerate(data_list[i][1]):
166*523fa7a6SAndroid Build Coastguard Worker                if idx >= len(mem_prof_data_list):
167*523fa7a6SAndroid Build Coastguard Worker                    mem_prof_data_list.append(event)
168*523fa7a6SAndroid Build Coastguard Worker
169*523fa7a6SAndroid Build Coastguard Worker        # Group all the memory allocation events based on the allocator they were
170*523fa7a6SAndroid Build Coastguard Worker        # allocated from.
171*523fa7a6SAndroid Build Coastguard Worker        alloc_sum_dict = OrderedDict()
172*523fa7a6SAndroid Build Coastguard Worker        for alloc in mem_prof_data_list:
173*523fa7a6SAndroid Build Coastguard Worker            alloc_sum_dict[alloc.allocator_id] = (
174*523fa7a6SAndroid Build Coastguard Worker                alloc_sum_dict.get(alloc.allocator_id, 0) + alloc.allocation_size
175*523fa7a6SAndroid Build Coastguard Worker            )
176*523fa7a6SAndroid Build Coastguard Worker
177*523fa7a6SAndroid Build Coastguard Worker        mem_prof_sum_list = []
178*523fa7a6SAndroid Build Coastguard Worker        for allocator_id, allocation_size in alloc_sum_dict.items():
179*523fa7a6SAndroid Build Coastguard Worker            mem_prof_sum_list.append(
180*523fa7a6SAndroid Build Coastguard Worker                MemEvent(allocator_dict[allocator_id], allocation_size)
181*523fa7a6SAndroid Build Coastguard Worker            )
182*523fa7a6SAndroid Build Coastguard Worker        prof_data[name] = prof_data_list
183*523fa7a6SAndroid Build Coastguard Worker        mem_prof_data[name] = mem_prof_sum_list
184*523fa7a6SAndroid Build Coastguard Worker
185*523fa7a6SAndroid Build Coastguard Worker    return prof_data, mem_prof_data
186*523fa7a6SAndroid Build Coastguard Worker
187*523fa7a6SAndroid Build Coastguard Worker
188*523fa7a6SAndroid Build Coastguard Workerdef sanity_check_prof_outputs(
189*523fa7a6SAndroid Build Coastguard Worker    prof_blocks: Dict[str, List[Tuple[List[ProfileData], List[MemAllocation]]]]
190*523fa7a6SAndroid Build Coastguard Worker):
191*523fa7a6SAndroid Build Coastguard Worker    for _, prof_block_vals in prof_blocks.items():
192*523fa7a6SAndroid Build Coastguard Worker        for i in range(len(prof_block_vals) - 1):
193*523fa7a6SAndroid Build Coastguard Worker            prof_data_list_base = prof_block_vals[i][0]
194*523fa7a6SAndroid Build Coastguard Worker            prof_data_list_cmp = prof_block_vals[i + 1][0]
195*523fa7a6SAndroid Build Coastguard Worker
196*523fa7a6SAndroid Build Coastguard Worker            # Profiling blocks corresponding to the same name should always be of the same
197*523fa7a6SAndroid Build Coastguard Worker            # size as they essentially just represent one iteration of a code block that has been
198*523fa7a6SAndroid Build Coastguard Worker            # run multiple times.
199*523fa7a6SAndroid Build Coastguard Worker            if len(prof_data_list_base) != len(prof_data_list_cmp):
200*523fa7a6SAndroid Build Coastguard Worker                raise ValueError(
201*523fa7a6SAndroid Build Coastguard Worker                    "Profiling blocks corresponding to the same name shouldn't be of different lengths."
202*523fa7a6SAndroid Build Coastguard Worker                )
203*523fa7a6SAndroid Build Coastguard Worker
204*523fa7a6SAndroid Build Coastguard Worker            for idx in range(len(prof_data_list_base)):
205*523fa7a6SAndroid Build Coastguard Worker                if prof_data_list_base[idx].name != prof_data_list_cmp[idx].name:
206*523fa7a6SAndroid Build Coastguard Worker                    raise ValueError(
207*523fa7a6SAndroid Build Coastguard Worker                        "Corresponding entries in different iterations of the "
208*523fa7a6SAndroid Build Coastguard Worker                        "profiling block do not match"
209*523fa7a6SAndroid Build Coastguard Worker                    )
210*523fa7a6SAndroid Build Coastguard Worker
211*523fa7a6SAndroid Build Coastguard Worker            mem_prof_data_list_base = prof_block_vals[i][1]
212*523fa7a6SAndroid Build Coastguard Worker            mem_prof_data_list_cmp = prof_block_vals[i + 1][1]
213*523fa7a6SAndroid Build Coastguard Worker
214*523fa7a6SAndroid Build Coastguard Worker            if len(mem_prof_data_list_base) != len(mem_prof_data_list_cmp):
215*523fa7a6SAndroid Build Coastguard Worker                raise ValueError(
216*523fa7a6SAndroid Build Coastguard Worker                    "Memory profiling blocks corresponding to the same name shouldn't be of different lengths."
217*523fa7a6SAndroid Build Coastguard Worker                )
218*523fa7a6SAndroid Build Coastguard Worker
219*523fa7a6SAndroid Build Coastguard Worker            for idx in range(len(mem_prof_data_list_base)):
220*523fa7a6SAndroid Build Coastguard Worker                if (
221*523fa7a6SAndroid Build Coastguard Worker                    mem_prof_data_list_base[idx].allocator_id
222*523fa7a6SAndroid Build Coastguard Worker                    != mem_prof_data_list_cmp[idx].allocator_id
223*523fa7a6SAndroid Build Coastguard Worker                ):
224*523fa7a6SAndroid Build Coastguard Worker                    raise ValueError(
225*523fa7a6SAndroid Build Coastguard Worker                        "Corresponding entries in different iterations of the memory "
226*523fa7a6SAndroid Build Coastguard Worker                        "profiling blocks do not have the same allocator id"
227*523fa7a6SAndroid Build Coastguard Worker                    )
228*523fa7a6SAndroid Build Coastguard Worker                if (
229*523fa7a6SAndroid Build Coastguard Worker                    mem_prof_data_list_base[idx].allocation_size
230*523fa7a6SAndroid Build Coastguard Worker                    != mem_prof_data_list_cmp[idx].allocation_size
231*523fa7a6SAndroid Build Coastguard Worker                ):
232*523fa7a6SAndroid Build Coastguard Worker                    raise ValueError(
233*523fa7a6SAndroid Build Coastguard Worker                        "Corresponding entries in different iterations of the memory "
234*523fa7a6SAndroid Build Coastguard Worker                        "profiling blocks do not have the same allocation size."
235*523fa7a6SAndroid Build Coastguard Worker                    )
236*523fa7a6SAndroid Build Coastguard Worker
237*523fa7a6SAndroid Build Coastguard Worker
238*523fa7a6SAndroid Build Coastguard Workerdef deserialize_profile_results(
239*523fa7a6SAndroid Build Coastguard Worker    buff: bytes, time_scale: TimeScale = TimeScale.TIME_IN_NS
240*523fa7a6SAndroid Build Coastguard Worker) -> Tuple[Dict[str, List[ProfileEvent]], Dict[str, List[MemEvent]]]:
241*523fa7a6SAndroid Build Coastguard Worker
242*523fa7a6SAndroid Build Coastguard Worker    prof_header_struct_size = struct.calcsize(PROF_HEADER_STRUCT_FMT)
243*523fa7a6SAndroid Build Coastguard Worker    prof_allocator_struct_size = struct.calcsize(ALLOCATOR_STRUCT_FMT)
244*523fa7a6SAndroid Build Coastguard Worker    prof_allocation_struct_size = struct.calcsize(ALLOCATION_STRUCT_FMT)
245*523fa7a6SAndroid Build Coastguard Worker    prof_result_struct_size = struct.calcsize(PROF_RESULT_STRUCT_FMT)
246*523fa7a6SAndroid Build Coastguard Worker    prof_blocks = OrderedDict()
247*523fa7a6SAndroid Build Coastguard Worker    allocator_dict = {}
248*523fa7a6SAndroid Build Coastguard Worker    base_offset = 0
249*523fa7a6SAndroid Build Coastguard Worker
250*523fa7a6SAndroid Build Coastguard Worker    while base_offset < len(buff):
251*523fa7a6SAndroid Build Coastguard Worker        # Unpack the header for this profiling block from which we can figure
252*523fa7a6SAndroid Build Coastguard Worker        # out how many profiling entries are present in this block.
253*523fa7a6SAndroid Build Coastguard Worker        prof_header_args = list(
254*523fa7a6SAndroid Build Coastguard Worker            struct.unpack_from(PROF_HEADER_STRUCT_FMT, buff, offset=base_offset)
255*523fa7a6SAndroid Build Coastguard Worker        )
256*523fa7a6SAndroid Build Coastguard Worker        # decode name in profiler header
257*523fa7a6SAndroid Build Coastguard Worker        prof_header_args[0] = prof_header_args[0].decode("utf-8").replace("\u0000", "")
258*523fa7a6SAndroid Build Coastguard Worker        prof_header = ProfilerHeader(*prof_header_args)
259*523fa7a6SAndroid Build Coastguard Worker        base_offset += prof_header_struct_size
260*523fa7a6SAndroid Build Coastguard Worker
261*523fa7a6SAndroid Build Coastguard Worker        assert prof_header.prof_ver == ET_PROF_VER, (
262*523fa7a6SAndroid Build Coastguard Worker            "Mismatch in version between profile dump" "and post-processing tool"
263*523fa7a6SAndroid Build Coastguard Worker        )
264*523fa7a6SAndroid Build Coastguard Worker        # Get all the profiling (perf events) entries
265*523fa7a6SAndroid Build Coastguard Worker        prof_data = []
266*523fa7a6SAndroid Build Coastguard Worker        for i in range(prof_header.prof_entries):
267*523fa7a6SAndroid Build Coastguard Worker            name_bytes, type, id, start_time, end_time = struct.unpack_from(
268*523fa7a6SAndroid Build Coastguard Worker                PROF_RESULT_STRUCT_FMT,
269*523fa7a6SAndroid Build Coastguard Worker                buff,
270*523fa7a6SAndroid Build Coastguard Worker                offset=base_offset + i * prof_result_struct_size,
271*523fa7a6SAndroid Build Coastguard Worker            )
272*523fa7a6SAndroid Build Coastguard Worker            prof_data.append(
273*523fa7a6SAndroid Build Coastguard Worker                ProfileData(
274*523fa7a6SAndroid Build Coastguard Worker                    # name_bytes is 32 bytes string, where if the real log event is less
275*523fa7a6SAndroid Build Coastguard Worker                    # than 32 characters it'll be filled with 0 chars => trimming it
276*523fa7a6SAndroid Build Coastguard Worker                    name_bytes.decode("utf-8").replace("\u0000", ""),
277*523fa7a6SAndroid Build Coastguard Worker                    type,
278*523fa7a6SAndroid Build Coastguard Worker                    id,
279*523fa7a6SAndroid Build Coastguard Worker                    start_time,
280*523fa7a6SAndroid Build Coastguard Worker                    end_time,
281*523fa7a6SAndroid Build Coastguard Worker                )
282*523fa7a6SAndroid Build Coastguard Worker            )
283*523fa7a6SAndroid Build Coastguard Worker
284*523fa7a6SAndroid Build Coastguard Worker        # Move forward in the profiling block to start parsing memory allocation events.
285*523fa7a6SAndroid Build Coastguard Worker        base_offset += prof_result_struct_size * prof_header.max_prof_entries
286*523fa7a6SAndroid Build Coastguard Worker
287*523fa7a6SAndroid Build Coastguard Worker        # Parse the allocator entries table, this table maps the allocator id to the
288*523fa7a6SAndroid Build Coastguard Worker        # string containing the name designated to this allocator.
289*523fa7a6SAndroid Build Coastguard Worker        for i in range(0, prof_header.allocator_entries):
290*523fa7a6SAndroid Build Coastguard Worker            allocator_name, allocator_id = struct.unpack_from(
291*523fa7a6SAndroid Build Coastguard Worker                ALLOCATOR_STRUCT_FMT,
292*523fa7a6SAndroid Build Coastguard Worker                buff,
293*523fa7a6SAndroid Build Coastguard Worker                offset=base_offset + i * prof_allocator_struct_size,
294*523fa7a6SAndroid Build Coastguard Worker            )
295*523fa7a6SAndroid Build Coastguard Worker            allocator_dict[allocator_id] = allocator_name.decode("utf-8").replace(
296*523fa7a6SAndroid Build Coastguard Worker                "\u0000", ""
297*523fa7a6SAndroid Build Coastguard Worker            )
298*523fa7a6SAndroid Build Coastguard Worker
299*523fa7a6SAndroid Build Coastguard Worker        base_offset += prof_allocator_struct_size * prof_header.max_allocator_entries
300*523fa7a6SAndroid Build Coastguard Worker
301*523fa7a6SAndroid Build Coastguard Worker        # Get all the profiling (memory allocation events) entries
302*523fa7a6SAndroid Build Coastguard Worker        mem_prof_data = []
303*523fa7a6SAndroid Build Coastguard Worker        for i in range(0, prof_header.mem_prof_entries):
304*523fa7a6SAndroid Build Coastguard Worker            mem_prof_entry = list(
305*523fa7a6SAndroid Build Coastguard Worker                struct.unpack_from(
306*523fa7a6SAndroid Build Coastguard Worker                    ALLOCATION_STRUCT_FMT,
307*523fa7a6SAndroid Build Coastguard Worker                    buff,
308*523fa7a6SAndroid Build Coastguard Worker                    offset=base_offset + i * prof_allocation_struct_size,
309*523fa7a6SAndroid Build Coastguard Worker                )
310*523fa7a6SAndroid Build Coastguard Worker            )
311*523fa7a6SAndroid Build Coastguard Worker            mem_prof_data.append(MemAllocation(*mem_prof_entry))
312*523fa7a6SAndroid Build Coastguard Worker
313*523fa7a6SAndroid Build Coastguard Worker        base_offset += prof_allocation_struct_size * prof_header.max_mem_prof_entries
314*523fa7a6SAndroid Build Coastguard Worker
315*523fa7a6SAndroid Build Coastguard Worker        # Get the name of this profiling block and append the profiling data and memory
316*523fa7a6SAndroid Build Coastguard Worker        # allocation data we just parsed to the list that maps to this block name.
317*523fa7a6SAndroid Build Coastguard Worker        prof_blocks[prof_header.name] = prof_blocks.get(prof_header.name, []) + [
318*523fa7a6SAndroid Build Coastguard Worker            (prof_data, mem_prof_data)
319*523fa7a6SAndroid Build Coastguard Worker        ]
320*523fa7a6SAndroid Build Coastguard Worker
321*523fa7a6SAndroid Build Coastguard Worker    sanity_check_prof_outputs(prof_blocks)
322*523fa7a6SAndroid Build Coastguard Worker    return parse_prof_blocks(prof_blocks, allocator_dict, time_scale)
323*523fa7a6SAndroid Build Coastguard Worker
324*523fa7a6SAndroid Build Coastguard Worker
325*523fa7a6SAndroid Build Coastguard Workerdef profile_table(
326*523fa7a6SAndroid Build Coastguard Worker    profile_data: Dict[str, List[ProfileEvent]], model_buffer=None
327*523fa7a6SAndroid Build Coastguard Worker) -> List[PrettyTable]:
328*523fa7a6SAndroid Build Coastguard Worker
329*523fa7a6SAndroid Build Coastguard Worker    results = []
330*523fa7a6SAndroid Build Coastguard Worker    max_len = 0
331*523fa7a6SAndroid Build Coastguard Worker
332*523fa7a6SAndroid Build Coastguard Worker    for name, prof_entries_list in profile_data.items():
333*523fa7a6SAndroid Build Coastguard Worker        table = PrettyTable()
334*523fa7a6SAndroid Build Coastguard Worker        table.title = name
335*523fa7a6SAndroid Build Coastguard Worker        table.add_rows(
336*523fa7a6SAndroid Build Coastguard Worker            [
337*523fa7a6SAndroid Build Coastguard Worker                (
338*523fa7a6SAndroid Build Coastguard Worker                    entry.name,
339*523fa7a6SAndroid Build Coastguard Worker                    entry.chain_idx,
340*523fa7a6SAndroid Build Coastguard Worker                    entry.instruction_idx,
341*523fa7a6SAndroid Build Coastguard Worker                    None,
342*523fa7a6SAndroid Build Coastguard Worker                )
343*523fa7a6SAndroid Build Coastguard Worker                + tuple(val for val in entry.duration)
344*523fa7a6SAndroid Build Coastguard Worker                for entry in prof_entries_list
345*523fa7a6SAndroid Build Coastguard Worker            ]
346*523fa7a6SAndroid Build Coastguard Worker        )
347*523fa7a6SAndroid Build Coastguard Worker        max_len = max(max_len, len(prof_entries_list[0].duration))
348*523fa7a6SAndroid Build Coastguard Worker        table.field_names = [
349*523fa7a6SAndroid Build Coastguard Worker            "Name",
350*523fa7a6SAndroid Build Coastguard Worker            "Chain",
351*523fa7a6SAndroid Build Coastguard Worker            "Instr",
352*523fa7a6SAndroid Build Coastguard Worker            "Frame",
353*523fa7a6SAndroid Build Coastguard Worker        ] + ["Iteration " + str(i) for i in range(max_len)]
354*523fa7a6SAndroid Build Coastguard Worker        results.append(table)
355*523fa7a6SAndroid Build Coastguard Worker    return results
356*523fa7a6SAndroid Build Coastguard Worker
357*523fa7a6SAndroid Build Coastguard Worker
358*523fa7a6SAndroid Build Coastguard Workerdef mem_profile_table(mem_allocations: Dict[str, List[MemEvent]]) -> List[PrettyTable]:
359*523fa7a6SAndroid Build Coastguard Worker    tables = []
360*523fa7a6SAndroid Build Coastguard Worker    for name, prof_data_list in mem_allocations.items():
361*523fa7a6SAndroid Build Coastguard Worker        table = PrettyTable()
362*523fa7a6SAndroid Build Coastguard Worker        table.title = name
363*523fa7a6SAndroid Build Coastguard Worker        table_rows = []
364*523fa7a6SAndroid Build Coastguard Worker        for mem_event in prof_data_list:
365*523fa7a6SAndroid Build Coastguard Worker            table_rows += [(mem_event.allocator_name, mem_event.total_allocations_done)]
366*523fa7a6SAndroid Build Coastguard Worker        table.add_rows(table_rows)
367*523fa7a6SAndroid Build Coastguard Worker        table.field_names = ["Allocator name"] + ["Total size of allocations done"]
368*523fa7a6SAndroid Build Coastguard Worker        tables.append(table)
369*523fa7a6SAndroid Build Coastguard Worker    return tables
370*523fa7a6SAndroid Build Coastguard Worker
371*523fa7a6SAndroid Build Coastguard Worker
372*523fa7a6SAndroid Build Coastguard Workerdef profile_aggregate_framework_tax(
373*523fa7a6SAndroid Build Coastguard Worker    prof_data: Dict[str, List[ProfileEvent]]
374*523fa7a6SAndroid Build Coastguard Worker) -> Dict[str, ProfileEventFrameworkTax]:
375*523fa7a6SAndroid Build Coastguard Worker    prof_framework_tax = OrderedDict()
376*523fa7a6SAndroid Build Coastguard Worker
377*523fa7a6SAndroid Build Coastguard Worker    for name, prof_data_list in prof_data.items():
378*523fa7a6SAndroid Build Coastguard Worker        execute_max = []
379*523fa7a6SAndroid Build Coastguard Worker        kernel_and_delegate_sum = []
380*523fa7a6SAndroid Build Coastguard Worker
381*523fa7a6SAndroid Build Coastguard Worker        for d in prof_data_list:
382*523fa7a6SAndroid Build Coastguard Worker            if "Method::execute" in d.name:
383*523fa7a6SAndroid Build Coastguard Worker                execute_max = max(execute_max, d.duration)
384*523fa7a6SAndroid Build Coastguard Worker
385*523fa7a6SAndroid Build Coastguard Worker            if "native_call" in d.name or "delegate_execute" in d.name:
386*523fa7a6SAndroid Build Coastguard Worker                for idx in range(len(d.duration)):
387*523fa7a6SAndroid Build Coastguard Worker                    if idx < len(kernel_and_delegate_sum):
388*523fa7a6SAndroid Build Coastguard Worker                        kernel_and_delegate_sum[idx] += d.duration[idx]
389*523fa7a6SAndroid Build Coastguard Worker                    else:
390*523fa7a6SAndroid Build Coastguard Worker                        kernel_and_delegate_sum.append(d.duration[idx])
391*523fa7a6SAndroid Build Coastguard Worker
392*523fa7a6SAndroid Build Coastguard Worker        if len(execute_max) == 0 or len(kernel_and_delegate_sum) == 0:
393*523fa7a6SAndroid Build Coastguard Worker            continue
394*523fa7a6SAndroid Build Coastguard Worker
395*523fa7a6SAndroid Build Coastguard Worker        framework_tax_list = [
396*523fa7a6SAndroid Build Coastguard Worker            round((execute_time - kernel_delegate_call) / execute_time, 4) * 100
397*523fa7a6SAndroid Build Coastguard Worker            for execute_time, kernel_delegate_call in zip(
398*523fa7a6SAndroid Build Coastguard Worker                execute_max, kernel_and_delegate_sum
399*523fa7a6SAndroid Build Coastguard Worker            )
400*523fa7a6SAndroid Build Coastguard Worker        ]
401*523fa7a6SAndroid Build Coastguard Worker
402*523fa7a6SAndroid Build Coastguard Worker        prof_framework_tax[name] = ProfileEventFrameworkTax(
403*523fa7a6SAndroid Build Coastguard Worker            execute_max, kernel_and_delegate_sum, framework_tax_list
404*523fa7a6SAndroid Build Coastguard Worker        )
405*523fa7a6SAndroid Build Coastguard Worker
406*523fa7a6SAndroid Build Coastguard Worker    return prof_framework_tax
407*523fa7a6SAndroid Build Coastguard Worker
408*523fa7a6SAndroid Build Coastguard Worker
409*523fa7a6SAndroid Build Coastguard Workerdef profile_framework_tax_table(
410*523fa7a6SAndroid Build Coastguard Worker    prof_framework_tax_data: Dict[str, ProfileEventFrameworkTax]
411*523fa7a6SAndroid Build Coastguard Worker):
412*523fa7a6SAndroid Build Coastguard Worker    tables = []
413*523fa7a6SAndroid Build Coastguard Worker    for name, prof_data_list in prof_framework_tax_data.items():
414*523fa7a6SAndroid Build Coastguard Worker        tables = []
415*523fa7a6SAndroid Build Coastguard Worker        table_agg = PrettyTable()
416*523fa7a6SAndroid Build Coastguard Worker        table_agg.title = name + " framework tax calculations"
417*523fa7a6SAndroid Build Coastguard Worker
418*523fa7a6SAndroid Build Coastguard Worker        table_agg.add_rows(
419*523fa7a6SAndroid Build Coastguard Worker            [
420*523fa7a6SAndroid Build Coastguard Worker                ("Model execution time", *prof_data_list.exec_time),
421*523fa7a6SAndroid Build Coastguard Worker                (
422*523fa7a6SAndroid Build Coastguard Worker                    "Time spent in kernels and delegates",
423*523fa7a6SAndroid Build Coastguard Worker                    *prof_data_list.kernel_and_delegate_time,
424*523fa7a6SAndroid Build Coastguard Worker                ),
425*523fa7a6SAndroid Build Coastguard Worker                ("Framework tax (%)", *prof_data_list.framework_tax),
426*523fa7a6SAndroid Build Coastguard Worker            ]
427*523fa7a6SAndroid Build Coastguard Worker        )
428*523fa7a6SAndroid Build Coastguard Worker        table_agg.field_names = [""] + [
429*523fa7a6SAndroid Build Coastguard Worker            "Iteration " + str(i) for i in range(len(prof_data_list.exec_time))
430*523fa7a6SAndroid Build Coastguard Worker        ]
431*523fa7a6SAndroid Build Coastguard Worker        tables.append(table_agg)
432*523fa7a6SAndroid Build Coastguard Worker    return tables
433*523fa7a6SAndroid Build Coastguard Worker
434*523fa7a6SAndroid Build Coastguard Worker
435*523fa7a6SAndroid Build Coastguard Workerdef deserialize_profile_results_files(
436*523fa7a6SAndroid Build Coastguard Worker    profile_results_path: str,
437*523fa7a6SAndroid Build Coastguard Worker    bundled_program_ff_path: str,
438*523fa7a6SAndroid Build Coastguard Worker    time_scale: TimeScale = TimeScale.TIME_IN_NS,
439*523fa7a6SAndroid Build Coastguard Worker):
440*523fa7a6SAndroid Build Coastguard Worker    with open(profile_results_path, "rb") as prof_res_file, open(
441*523fa7a6SAndroid Build Coastguard Worker        bundled_program_ff_path, "rb"
442*523fa7a6SAndroid Build Coastguard Worker    ) as model_ff_file:
443*523fa7a6SAndroid Build Coastguard Worker        prof_res_buf = prof_res_file.read()
444*523fa7a6SAndroid Build Coastguard Worker        bundled_program_ff_buf = model_ff_file.read()
445*523fa7a6SAndroid Build Coastguard Worker
446*523fa7a6SAndroid Build Coastguard Worker    prof_data, mem_allocations = deserialize_profile_results(prof_res_buf, time_scale)
447*523fa7a6SAndroid Build Coastguard Worker    framework_tax_data = profile_aggregate_framework_tax(prof_data)
448*523fa7a6SAndroid Build Coastguard Worker
449*523fa7a6SAndroid Build Coastguard Worker    prof_tables = profile_table(prof_data, bundled_program_ff_buf)
450*523fa7a6SAndroid Build Coastguard Worker    for table in prof_tables:
451*523fa7a6SAndroid Build Coastguard Worker        print(table)
452*523fa7a6SAndroid Build Coastguard Worker
453*523fa7a6SAndroid Build Coastguard Worker    prof_tables_agg = profile_framework_tax_table(framework_tax_data)
454*523fa7a6SAndroid Build Coastguard Worker    for table in prof_tables_agg:
455*523fa7a6SAndroid Build Coastguard Worker        print(table)
456*523fa7a6SAndroid Build Coastguard Worker
457*523fa7a6SAndroid Build Coastguard Worker    mem_prof_table = mem_profile_table(mem_allocations)
458*523fa7a6SAndroid Build Coastguard Worker    for table in mem_prof_table:
459*523fa7a6SAndroid Build Coastguard Worker        print(table)
460*523fa7a6SAndroid Build Coastguard Worker
461*523fa7a6SAndroid Build Coastguard Worker    return prof_data, mem_allocations
462