xref: /aosp_15_r20/external/toolchain-utils/cros_utils/perf_diff.py (revision 760c253c1ed00ce9abd48f8546f08516e57485fe)
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2019 The ChromiumOS Authors
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""One-line documentation for perf_diff module.
8
9A detailed description of perf_diff.
10"""
11
12
13__author__ = "[email protected] (Ahmad Sharif)"
14
15import argparse
16import functools
17import re
18import sys
19
20from cros_utils import misc
21from cros_utils import tabulator
22
23
24ROWS_TO_SHOW = "Rows_to_show_in_the_perf_table"
25TOTAL_EVENTS = "Total_events_of_this_profile"
26
27
28def GetPerfDictFromReport(report_file):
29    output = {}
30    perf_report = PerfReport(report_file)
31    for k, v in perf_report.sections.items():
32        if k not in output:
33            output[k] = {}
34        output[k][ROWS_TO_SHOW] = 0
35        output[k][TOTAL_EVENTS] = 0
36        for function in v.functions:
37            out_key = "%s" % (function.name)
38            output[k][out_key] = function.count
39            output[k][TOTAL_EVENTS] += function.count
40            if function.percent > 1:
41                output[k][ROWS_TO_SHOW] += 1
42    return output
43
44
45def _SortDictionaryByValue(d):
46    l = d.items()
47
48    def GetFloat(x):
49        if misc.IsFloat(x):
50            return float(x)
51        else:
52            return x
53
54    sorted_l = sorted(l, key=lambda x: GetFloat(x[1]))
55    sorted_l.reverse()
56    return [f[0] for f in sorted_l]
57
58
59class Tabulator(object):
60    """Make tables."""
61
62    def __init__(self, all_dicts):
63        self._all_dicts = all_dicts
64
65    def PrintTable(self):
66        for dicts in self._all_dicts:
67            self.PrintTableHelper(dicts)
68
69    def PrintTableHelper(self, dicts):
70        """Transfrom dicts to tables."""
71        fields = {}
72        for d in dicts:
73            for f in d.keys():
74                if f not in fields:
75                    fields[f] = d[f]
76                else:
77                    fields[f] = max(fields[f], d[f])
78        table = []
79        header = ["name"]
80        for i in range(len(dicts)):
81            header.append(i)
82
83        table.append(header)
84
85        sorted_fields = _SortDictionaryByValue(fields)
86
87        for f in sorted_fields:
88            row = [f]
89            for d in dicts:
90                if f in d:
91                    row.append(d[f])
92                else:
93                    row.append("0")
94            table.append(row)
95
96        print(tabulator.GetSimpleTable(table))
97
98
99class Function(object):
100    """Function for formatting."""
101
102    def __init__(self):
103        self.count = 0
104        self.name = ""
105        self.percent = 0
106
107
108class Section(object):
109    """Section formatting."""
110
111    def __init__(self, contents):
112        self.name = ""
113        self.raw_contents = contents
114        self._ParseSection()
115
116    def _ParseSection(self):
117        matches = re.findall(r"Events: (\w+)\s+(.*)", self.raw_contents)
118        assert len(matches) <= 1, "More than one event found in 1 section"
119        if not matches:
120            return
121        match = matches[0]
122        self.name = match[1]
123        self.count = misc.UnitToNumber(match[0])
124
125        self.functions = []
126        for line in self.raw_contents.splitlines():
127            if not line.strip():
128                continue
129            if "%" not in line:
130                continue
131            if not line.startswith("#"):
132                fields = [f for f in line.split(" ") if f]
133                function = Function()
134                function.percent = float(fields[0].strip("%"))
135                function.count = int(fields[1])
136                function.name = " ".join(fields[2:])
137                self.functions.append(function)
138
139
140class PerfReport(object):
141    """Get report from raw report."""
142
143    def __init__(self, perf_file):
144        self.perf_file = perf_file
145        self._ReadFile()
146        self.sections = {}
147        self.metadata = {}
148        self._section_contents = []
149        self._section_header = ""
150        self._SplitSections()
151        self._ParseSections()
152        self._ParseSectionHeader()
153
154    def _ParseSectionHeader(self):
155        """Parse a header of a perf report file."""
156        # The "captured on" field is inaccurate - this actually refers to when the
157        # report was generated, not when the data was captured.
158        for line in self._section_header.splitlines():
159            line = line[2:]
160            if ":" in line:
161                key, val = line.strip().split(":", 1)
162                key = key.strip()
163                val = val.strip()
164                self.metadata[key] = val
165
166    def _ReadFile(self):
167        self._perf_contents = open(self.perf_file).read()
168
169    def _ParseSections(self):
170        self.event_counts = {}
171        self.sections = {}
172        for section_content in self._section_contents:
173            section = Section(section_content)
174            section.name = self._GetHumanReadableName(section.name)
175            self.sections[section.name] = section
176
177    # TODO(asharif): Do this better.
178    def _GetHumanReadableName(self, section_name):
179        if not "raw" in section_name:
180            return section_name
181        raw_number = section_name.strip().split(" ")[-1]
182        for line in self._section_header.splitlines():
183            if raw_number in line:
184                name = line.strip().split(" ")[5]
185                return name
186
187    def _SplitSections(self):
188        self._section_contents = []
189        indices = [
190            m.start() for m in re.finditer("# Events:", self._perf_contents)
191        ]
192        indices.append(len(self._perf_contents))
193        for i in range(len(indices) - 1):
194            section_content = self._perf_contents[indices[i] : indices[i + 1]]
195            self._section_contents.append(section_content)
196        self._section_header = ""
197        if indices:
198            self._section_header = self._perf_contents[0 : indices[0]]
199
200
201class PerfDiffer(object):
202    """Perf differ class."""
203
204    def __init__(self, reports, num_symbols, common_only):
205        self._reports = reports
206        self._num_symbols = num_symbols
207        self._common_only = common_only
208        self._common_function_names = {}
209
210    def DoDiff(self):
211        """The function that does the diff."""
212        section_names = self._FindAllSections()
213
214        filename_dicts = []
215        summary_dicts = []
216        for report in self._reports:
217            d = {}
218            filename_dicts.append({"file": report.perf_file})
219            for section_name in section_names:
220                if section_name in report.sections:
221                    d[section_name] = report.sections[section_name].count
222            summary_dicts.append(d)
223
224        all_dicts = [filename_dicts, summary_dicts]
225
226        for section_name in section_names:
227            function_names = self._GetTopFunctions(
228                section_name, self._num_symbols
229            )
230            self._FindCommonFunctions(section_name)
231            dicts = []
232            for report in self._reports:
233                d = {}
234                if section_name in report.sections:
235                    section = report.sections[section_name]
236
237                    # Get a common scaling factor for this report.
238                    common_scaling_factor = self._GetCommonScalingFactor(
239                        section
240                    )
241
242                    for function in section.functions:
243                        if function.name in function_names:
244                            key = "%s %s" % (section.name, function.name)
245                            d[key] = function.count
246                            # Compute a factor to scale the function count by in common_only
247                            # mode.
248                            if self._common_only and (
249                                function.name
250                                in self._common_function_names[section.name]
251                            ):
252                                d[key + " scaled"] = (
253                                    common_scaling_factor * function.count
254                                )
255                dicts.append(d)
256
257            all_dicts.append(dicts)
258
259        mytabulator = Tabulator(all_dicts)
260        mytabulator.PrintTable()
261
262    def _FindAllSections(self):
263        sections = {}
264        for report in self._reports:
265            for section in report.sections.values():
266                if section.name not in sections:
267                    sections[section.name] = section.count
268                else:
269                    sections[section.name] = max(
270                        sections[section.name], section.count
271                    )
272        return _SortDictionaryByValue(sections)
273
274    def _GetCommonScalingFactor(self, section):
275        unique_count = self._GetCount(
276            section, lambda x: x in self._common_function_names[section.name]
277        )
278        return 100.0 / unique_count
279
280    def _GetCount(self, section, filter_fun=None):
281        total_count = 0
282        for function in section.functions:
283            if not filter_fun or filter_fun(function.name):
284                total_count += int(function.count)
285        return total_count
286
287    def _FindCommonFunctions(self, section_name):
288        function_names_list = []
289        for report in self._reports:
290            if section_name in report.sections:
291                section = report.sections[section_name]
292                function_names = {f.name for f in section.functions}
293                function_names_list.append(function_names)
294
295        self._common_function_names[section_name] = functools.reduce(
296            set.intersection, function_names_list
297        )
298
299    def _GetTopFunctions(self, section_name, num_functions):
300        all_functions = {}
301        for report in self._reports:
302            if section_name in report.sections:
303                section = report.sections[section_name]
304                for f in section.functions[:num_functions]:
305                    if f.name in all_functions:
306                        all_functions[f.name] = max(
307                            all_functions[f.name], f.count
308                        )
309                    else:
310                        all_functions[f.name] = f.count
311        # FIXME(asharif): Don't really need to sort these...
312        return _SortDictionaryByValue(all_functions)
313
314    def _GetFunctionsDict(self, section, function_names):
315        d = {}
316        for function in section.functions:
317            if function.name in function_names:
318                d[function.name] = function.count
319        return d
320
321
322def Main(argv):
323    """The entry of the main."""
324    parser = argparse.ArgumentParser()
325    parser.add_argument(
326        "-n",
327        "--num_symbols",
328        dest="num_symbols",
329        default="5",
330        help="The number of symbols to show.",
331    )
332    parser.add_argument(
333        "-c",
334        "--common_only",
335        dest="common_only",
336        action="store_true",
337        default=False,
338        help="Diff common symbols only.",
339    )
340
341    options, args = parser.parse_known_args(argv)
342
343    try:
344        reports = []
345        for report in args[1:]:
346            report = PerfReport(report)
347            reports.append(report)
348        pd = PerfDiffer(reports, int(options.num_symbols), options.common_only)
349        pd.DoDiff()
350    finally:
351        pass
352
353    return 0
354
355
356if __name__ == "__main__":
357    sys.exit(Main(sys.argv))
358