xref: /aosp_15_r20/system/extras/simpleperf/scripts/pprof_proto_generator.py (revision 288bf5226967eb3dac5cce6c939ccc2a7f2b4fe5)
1#!/usr/bin/env python3
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be
19    used by pprof.
20
21  Example:
22    ./app_profiler.py
23    ./pprof_proto_generator.py
24    pprof -text pprof.profile
25"""
26
27import logging
28import os
29import os.path
30import re
31import sys
32from typing import Dict
33
34from simpleperf_report_lib import GetReportLib, MappingStruct
35from simpleperf_utils import (Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir,
36                              flatten_arg_list, log_exit, ReadElf, ToolFinder)
37try:
38    import profile_pb2
39except ImportError as e:
40    log_exit(f'{e}\nprotobuf package is missing or too old. Please install it like ' +
41             '`pip install protobuf==4.21`.')
42
43
44# Some units of common event names
45EVENT_UNITS = {
46    'cpu-clock': 'nanoseconds',
47    'cpu-cycles': 'cpu-cycles',
48    'instructions': 'instructions',
49    'task-clock': 'nanoseconds',
50}
51
52
53def load_pprof_profile(filename):
54    profile = profile_pb2.Profile()
55    with open(filename, "rb") as f:
56        profile.ParseFromString(f.read())
57    return profile
58
59
60def store_pprof_profile(filename, profile):
61    with open(filename, 'wb') as f:
62        f.write(profile.SerializeToString())
63
64
65class PprofProfilePrinter(object):
66
67    def __init__(self, profile):
68        self.profile = profile
69        self.string_table = profile.string_table
70
71    def show(self):
72        p = self.profile
73        sub_space = '  '
74        print('Profile {')
75        print('%d sample_types' % len(p.sample_type))
76        for i in range(len(p.sample_type)):
77            print('sample_type[%d] = ' % i, end='')
78            self.show_value_type(p.sample_type[i])
79        print('%d samples' % len(p.sample))
80        for i in range(len(p.sample)):
81            print('sample[%d]:' % i)
82            self.show_sample(p.sample[i], sub_space)
83        print('%d mappings' % len(p.mapping))
84        for i in range(len(p.mapping)):
85            print('mapping[%d]:' % i)
86            self.show_mapping(p.mapping[i], sub_space)
87        print('%d locations' % len(p.location))
88        for i in range(len(p.location)):
89            print('location[%d]:' % i)
90            self.show_location(p.location[i], sub_space)
91        for i in range(len(p.function)):
92            print('function[%d]:' % i)
93            self.show_function(p.function[i], sub_space)
94        print('%d strings' % len(p.string_table))
95        for i in range(len(p.string_table)):
96            print('string[%d]: %s' % (i, p.string_table[i]))
97        print('drop_frames: %s' % self.string(p.drop_frames))
98        print('keep_frames: %s' % self.string(p.keep_frames))
99        print('time_nanos: %u' % p.time_nanos)
100        print('duration_nanos: %u' % p.duration_nanos)
101        print('period_type: ', end='')
102        self.show_value_type(p.period_type)
103        print('period: %u' % p.period)
104        for i in range(len(p.comment)):
105            print('comment[%d] = %s' % (i, self.string(p.comment[i])))
106        print('default_sample_type: %d' % p.default_sample_type)
107        print('} // Profile')
108        print()
109
110    def show_value_type(self, value_type, space=''):
111        print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' %
112              (space, value_type.type, value_type.unit,
113               self.string(value_type.type), self.string(value_type.unit)))
114
115    def show_sample(self, sample, space=''):
116        sub_space = space + '  '
117        for i in range(len(sample.location_id)):
118            print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i]))
119            self.show_location_id(sample.location_id[i], sub_space)
120        for i in range(len(sample.value)):
121            print('%svalue[%d] = %d' % (space, i, sample.value[i]))
122        for i in range(len(sample.label)):
123            print('%slabel[%d] = %s:%s' % (space, i, self.string(sample.label[i].key),
124                                           self.string(sample.label[i].str)))
125
126    def show_location_id(self, location_id, space=''):
127        location = self.profile.location[location_id - 1]
128        self.show_location(location, space)
129
130    def show_location(self, location, space=''):
131        sub_space = space + '  '
132        print('%sid: %d' % (space, location.id))
133        print('%smapping_id: %d' % (space, location.mapping_id))
134        self.show_mapping_id(location.mapping_id, sub_space)
135        print('%saddress: %x' % (space, location.address))
136        for i in range(len(location.line)):
137            print('%sline[%d]:' % (space, i))
138            self.show_line(location.line[i], sub_space)
139
140    def show_mapping_id(self, mapping_id, space=''):
141        mapping = self.profile.mapping[mapping_id - 1]
142        self.show_mapping(mapping, space)
143
144    def show_mapping(self, mapping, space=''):
145        print('%sid: %d' % (space, mapping.id))
146        print('%smemory_start: %x' % (space, mapping.memory_start))
147        print('%smemory_limit: %x' % (space, mapping.memory_limit))
148        print('%sfile_offset: %x' % (space, mapping.file_offset))
149        print('%sfilename: %s(%d)' % (space, self.string(mapping.filename),
150                                      mapping.filename))
151        print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id),
152                                      mapping.build_id))
153        print('%shas_functions: %s' % (space, mapping.has_functions))
154        print('%shas_filenames: %s' % (space, mapping.has_filenames))
155        print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers))
156        print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames))
157
158    def show_line(self, line, space=''):
159        sub_space = space + '  '
160        print('%sfunction_id: %d' % (space, line.function_id))
161        self.show_function_id(line.function_id, sub_space)
162        print('%sline: %d' % (space, line.line))
163
164    def show_function_id(self, function_id, space=''):
165        function = self.profile.function[function_id - 1]
166        self.show_function(function, space)
167
168    def show_function(self, function, space=''):
169        print('%sid: %d' % (space, function.id))
170        print('%sname: %s' % (space, self.string(function.name)))
171        print('%ssystem_name: %s' % (space, self.string(function.system_name)))
172        print('%sfilename: %s' % (space, self.string(function.filename)))
173        print('%sstart_line: %d' % (space, function.start_line))
174
175    def string(self, string_id):
176        return self.string_table[string_id]
177
178
179class Label(object):
180    def __init__(self, key_id: int, str_id: int):
181        # See profile.Label.key
182        self.key_id = key_id
183        # See profile.Label.str
184        self.str_id = str_id
185
186
187class Sample(object):
188
189    def __init__(self):
190        self.location_ids = []
191        self.values = {}
192        self.labels = []
193
194    def add_location_id(self, location_id):
195        self.location_ids.append(location_id)
196
197    def add_value(self, sample_type_id, value):
198        self.values[sample_type_id] = self.values.get(sample_type_id, 0) + value
199
200    def add_values(self, values):
201        for sample_type_id, value in values.items():
202            self.add_value(sample_type_id, value)
203
204    @property
205    def key(self):
206        return tuple(self.location_ids)
207
208
209class Location(object):
210
211    def __init__(self, mapping_id, address, vaddr_in_dso):
212        self.id = -1  # unset
213        self.mapping_id = mapping_id
214        self.address = address
215        self.vaddr_in_dso = vaddr_in_dso
216        self.lines = []
217
218    @property
219    def key(self):
220        return (self.mapping_id, self.address)
221
222
223class Line(object):
224
225    def __init__(self):
226        self.function_id = 0
227        self.line = 0
228
229
230class Mapping(object):
231
232    def __init__(self, start, end, pgoff, filename_id, build_id_id):
233        self.id = -1  # unset
234        self.memory_start = start
235        self.memory_limit = end
236        self.file_offset = pgoff
237        self.filename_id = filename_id
238        self.build_id_id = build_id_id
239
240    @property
241    def key(self):
242        return (
243            self.memory_start,
244            self.memory_limit,
245            self.file_offset,
246            self.filename_id,
247            self.build_id_id)
248
249
250class Function(object):
251
252    def __init__(self, name_id, dso_name_id, vaddr_in_dso):
253        self.id = -1  # unset
254        self.name_id = name_id
255        self.dso_name_id = dso_name_id
256        self.vaddr_in_dso = vaddr_in_dso
257        self.source_filename_id = 0
258        self.start_line = 0
259
260    @property
261    def key(self):
262        return (self.name_id, self.dso_name_id)
263
264
265# pylint: disable=no-member
266class PprofProfileGenerator(object):
267
268    def __init__(self, config):
269        self.config = config
270        self.lib = None
271
272        config['binary_cache_dir'] = 'binary_cache'
273        if not os.path.isdir(config['binary_cache_dir']):
274            config['binary_cache_dir'] = None
275        self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None
276        self.max_chain_length = config['max_chain_length']
277        self.tagroot = config.get('tagroot', [])
278        self.show_event_counters = config['show_event_counters']
279        self.profile = profile_pb2.Profile()
280        self.profile.string_table.append('')
281        self.string_table = {}
282        self.sample_types = {}
283        self.sample_map = {}
284        self.sample_list = []
285        self.location_map = {}
286        self.location_list = []
287        self.mapping_map = {}
288        self.mapping_list = []
289        self.function_map = {}
290        self.function_list = []
291        self.pseudo_symbol_location_ids: Dict[str, int] = {}
292
293        # Map from dso_name in perf.data to (binary path, build_id).
294        self.binary_map = {}
295        self.read_elf = ReadElf(self.config['ndk_path'])
296        self.binary_finder = BinaryFinder(config['binary_cache_dir'], self.read_elf)
297
298    def load_record_file(self, record_file):
299        self.lib = GetReportLib(record_file)
300
301        if self.config['binary_cache_dir']:
302            self.lib.SetSymfs(self.config['binary_cache_dir'])
303            kallsyms = os.path.join(self.config['binary_cache_dir'], 'kallsyms')
304            if os.path.isfile(kallsyms):
305                self.lib.SetKallsymsFile(kallsyms)
306
307        if self.config.get('show_art_frames'):
308            self.lib.ShowArtFrames()
309        self.lib.SetReportOptions(self.config['report_lib_options'])
310
311        comments = [
312            "Simpleperf Record Command:\n" + self.lib.GetRecordCmd(),
313            "Converted to pprof with:\n" + " ".join(sys.argv),
314            "Architecture:\n" + self.lib.GetArch(),
315        ]
316        meta_info = self.lib.MetaInfo()
317        if "app_versioncode" in meta_info:
318            comments.append("App Version Code:\n" + meta_info["app_versioncode"])
319        for comment in comments:
320            self.profile.comment.append(self.get_string_id(comment))
321        if "timestamp" in meta_info:
322            self.profile.time_nanos = int(meta_info["timestamp"]) * 1000 * 1000 * 1000
323
324        numbers_re = re.compile(r"\d+")
325
326        # Process all samples in perf.data, aggregate samples.
327        last_counts: dict[int, int] = {}
328        while True:
329            report_sample = self.lib.GetNextSample()
330            if report_sample is None:
331                self.lib.Close()
332                self.lib = None
333                break
334            event = self.lib.GetEventOfCurrentSample()
335            symbol = self.lib.GetSymbolOfCurrentSample()
336            callchain = self.lib.GetCallChainOfCurrentSample()
337
338            sample = Sample()
339            sample_type_id = self.get_sample_type_id(event.name)
340            sample.add_value(sample_type_id, 1)
341            sample.add_value(sample_type_id + 1, report_sample.period)
342            self.add_event_counters(sample, last_counts)
343            sample.labels.append(Label(
344                self.get_string_id("thread"),
345                self.get_string_id(report_sample.thread_comm)))
346            # Heuristic: threadpools doing similar work are often named as
347            # name-1, name-2, name-3. Combine threadpools into one label
348            # "name-%d" if they only differ by a number.
349            sample.labels.append(Label(
350                self.get_string_id("threadpool"),
351                self.get_string_id(
352                    numbers_re.sub("%d", report_sample.thread_comm))))
353            sample.labels.append(Label(
354                self.get_string_id("pid"),
355                self.get_string_id(str(report_sample.pid))))
356            sample.labels.append(Label(
357                self.get_string_id("tid"),
358                self.get_string_id(str(report_sample.tid))))
359            if self._filter_symbol(symbol):
360                location_id = self.get_location_id(report_sample.ip, symbol)
361                sample.add_location_id(location_id)
362            for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr):
363                entry = callchain.entries[i]
364                if self._filter_symbol(symbol):
365                    location_id = self.get_location_id(entry.ip, entry.symbol)
366                    sample.add_location_id(location_id)
367            self.add_tagroot(sample)
368            if sample.location_ids:
369                self.add_sample(sample)
370
371    def gen(self, jobs: int):
372        # 1. Generate line info for locations and functions.
373        self.gen_source_lines(jobs)
374
375        # 2. Produce samples/locations/functions in profile.
376        for sample in self.sample_list:
377            self.gen_profile_sample(sample)
378        for mapping in self.mapping_list:
379            self.gen_profile_mapping(mapping)
380        for location in self.location_list:
381            self.gen_profile_location(location)
382        for function in self.function_list:
383            self.gen_profile_function(function)
384
385        return self.profile
386
387    def add_tagroot(self, sample: Sample):
388        if not self.tagroot:
389            return
390        for name in self.tagroot[::-1]:
391            if name == 'comm':
392                process_name = self.lib.GetProcessNameOfCurrentSample()
393                location_id = self.get_location_id_for_pseudo_symbol(f'process:{process_name}')
394                sample.add_location_id(location_id)
395            elif name == 'thread_comm':
396                thread_name = self.lib.GetCurrentSample().thread_comm
397                location_id = self.get_location_id_for_pseudo_symbol(f'thread:{thread_name}')
398                sample.add_location_id(location_id)
399
400    def add_event_counters(self, sample: Sample, last_counts: dict[int, int]):
401        if not self.show_event_counters:
402            return
403
404        event_counters = self.lib.GetEventCountersOfCurrentSample()
405        for i in range(event_counters.nr):
406            event_counter = event_counters.event_counter[i]
407            sample_type_id = self.get_sample_type_id(event_counter.name, add_suffix='_counter')
408            sample.add_value(sample_type_id, 1)
409
410            event_id = event_counter.id
411            event_acc_count = event_counter.count
412            last_count = last_counts.get(event_id, 0)
413            sample.add_value(sample_type_id + 1, event_acc_count - last_count)
414            last_counts[event_id] = event_acc_count
415
416    def _filter_symbol(self, symbol):
417        if not self.dso_filter or symbol.dso_name in self.dso_filter:
418            return True
419        return False
420
421    def get_string_id(self, str_value):
422        if not str_value:
423            return 0
424        str_id = self.string_table.get(str_value)
425        if str_id is not None:
426            return str_id
427        str_id = len(self.string_table) + 1
428        self.string_table[str_value] = str_id
429        self.profile.string_table.append(str_value)
430        return str_id
431
432    def get_string(self, str_id):
433        return self.profile.string_table[str_id]
434
435    def get_sample_type_id(self, name, *, add_suffix: str = None):
436        if add_suffix is not None:
437            name += add_suffix
438        sample_type_id = self.sample_types.get(name)
439        if sample_type_id is not None:
440            return sample_type_id
441        sample_type_id = len(self.profile.sample_type)
442        sample_type = self.profile.sample_type.add()
443        sample_type.type = self.get_string_id(name + '_samples')
444        sample_type.unit = self.get_string_id('samples')
445        sample_type = self.profile.sample_type.add()
446        sample_type.type = self.get_string_id(name)
447        units = EVENT_UNITS.get(name, 'count')
448        sample_type.unit = self.get_string_id(units)
449        self.sample_types[name] = sample_type_id
450        return sample_type_id
451
452    def get_location_id(self, ip, symbol):
453        binary_path, build_id = self.get_binary(symbol.dso_name)
454        mapping_id = self.get_mapping_id(symbol.mapping[0], binary_path, build_id)
455        location = Location(mapping_id, ip, symbol.vaddr_in_file)
456        function_id = self.get_function_id(symbol.symbol_name, binary_path, symbol.symbol_addr)
457        if function_id:
458            # Add Line only when it has a valid function id, see http://b/36988814.
459            # Default line info only contains the function name
460            line = Line()
461            line.function_id = function_id
462            location.lines.append(line)
463
464        exist_location = self.location_map.get(location.key)
465        if exist_location:
466            return exist_location.id
467        # location_id starts from 1
468        location.id = len(self.location_list) + 1
469        self.location_list.append(location)
470        self.location_map[location.key] = location
471        return location.id
472
473    def get_location_id_for_pseudo_symbol(self, symbol_name: str) -> int:
474        if not self.pseudo_symbol_location_ids:
475            self.pseudo_symbol_mapping = Mapping(0, 0, 0, self.get_string_id('pseudo_mapping'), 0)
476            self.pseudo_symbol_mapping.id = len(self.mapping_list) + 1
477            self.mapping_list.append(self.pseudo_symbol_mapping)
478        if location_id := self.pseudo_symbol_location_ids.get(symbol_name):
479            return location_id
480        ip = len(self.pseudo_symbol_location_ids)
481        self.pseudo_symbol_mapping.memory_limit = ip + 1
482        function_id = self.get_function_id(symbol_name, 'pseudo_mapping', ip)
483        location = Location(self.pseudo_symbol_mapping.id, ip, ip)
484        if function_id:
485            line = Line()
486            line.function_id = function_id
487            location.lines.append(line)
488        location.id = len(self.location_list) + 1
489        self.location_list.append(location)
490        self.location_map[location.key] = location
491        self.pseudo_symbol_location_ids[symbol_name] = location.id
492        return location.id
493
494    def get_mapping_id(self, report_mapping, filename, build_id):
495        filename_id = self.get_string_id(filename)
496        build_id_id = self.get_string_id(build_id)
497        mapping = Mapping(report_mapping.start, report_mapping.end,
498                          report_mapping.pgoff, filename_id, build_id_id)
499        exist_mapping = self.mapping_map.get(mapping.key)
500        if exist_mapping:
501            return exist_mapping.id
502        # mapping_id starts from 1
503        mapping.id = len(self.mapping_list) + 1
504        self.mapping_list.append(mapping)
505        self.mapping_map[mapping.key] = mapping
506        return mapping.id
507
508    def get_binary(self, dso_name):
509        """ Return (binary_path, build_id) for a given dso_name. """
510        value = self.binary_map.get(dso_name)
511        if value:
512            return value
513
514        binary_path = dso_name
515        build_id = self.lib.GetBuildIdForPath(dso_name)
516        # Try elf_path in binary cache.
517        elf_path = self.binary_finder.find_binary(dso_name, build_id)
518        if elf_path:
519            binary_path = str(elf_path)
520
521        # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding.
522        build_id = ReadElf.unpad_build_id(build_id)
523        self.binary_map[dso_name] = (binary_path, build_id)
524        return (binary_path, build_id)
525
526    def get_mapping(self, mapping_id):
527        return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None
528
529    def get_function_id(self, name, dso_name, vaddr_in_file):
530        if name == 'unknown':
531            return 0
532        function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file)
533        exist_function = self.function_map.get(function.key)
534        if exist_function:
535            return exist_function.id
536        # function_id starts from 1
537        function.id = len(self.function_list) + 1
538        self.function_list.append(function)
539        self.function_map[function.key] = function
540        return function.id
541
542    def get_function(self, function_id):
543        return self.function_list[function_id - 1] if function_id > 0 else None
544
545    def add_sample(self, sample):
546        exist_sample = self.sample_map.get(sample.key)
547        if exist_sample:
548            exist_sample.add_values(sample.values)
549        else:
550            self.sample_list.append(sample)
551            self.sample_map[sample.key] = sample
552
553    def gen_source_lines(self, jobs: int):
554        # 1. Create Addr2line instance
555        if not self.config.get('binary_cache_dir'):
556            logging.info("Can't generate line information because binary_cache is missing.")
557            return
558        if not ToolFinder.find_tool_path('llvm-symbolizer', self.config['ndk_path']):
559            logging.info("Can't generate line information because can't find llvm-symbolizer.")
560            return
561        # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to
562        # pass binary_cache_dir to BinaryFinder.
563        binary_finder = BinaryFinder(None, self.read_elf)
564        addr2line = Addr2Nearestline(self.config['ndk_path'], binary_finder, True)
565
566        # 2. Put all needed addresses to it.
567        for location in self.location_list:
568            mapping = self.get_mapping(location.mapping_id)
569            dso_name = self.get_string(mapping.filename_id)
570            if location.lines:
571                function = self.get_function(location.lines[0].function_id)
572                addr2line.add_addr(dso_name, None, function.vaddr_in_dso, location.vaddr_in_dso)
573        for function in self.function_list:
574            dso_name = self.get_string(function.dso_name_id)
575            addr2line.add_addr(dso_name, None, function.vaddr_in_dso, function.vaddr_in_dso)
576
577        # 3. Generate source lines.
578        addr2line.convert_addrs_to_lines(jobs)
579
580        # 4. Annotate locations and functions.
581        for location in self.location_list:
582            if not location.lines:
583                continue
584            mapping = self.get_mapping(location.mapping_id)
585            dso_name = self.get_string(mapping.filename_id)
586            dso = addr2line.get_dso(dso_name)
587            if not dso:
588                continue
589            sources = addr2line.get_addr_source(dso, location.vaddr_in_dso)
590            if not sources:
591                continue
592            for i, source in enumerate(sources):
593                source_file, source_line, function_name = source
594                if i == 0:
595                    # Don't override original function name from report library, which is more
596                    # accurate when proguard mapping file is given.
597                    function_id = location.lines[0].function_id
598                    # Clear default line info.
599                    location.lines.clear()
600                else:
601                    function_id = self.get_function_id(function_name, dso_name, 0)
602                if function_id == 0:
603                    continue
604                location.lines.append(self.add_line(source_file, source_line, function_id))
605
606        for function in self.function_list:
607            dso_name = self.get_string(function.dso_name_id)
608            if function.vaddr_in_dso:
609                dso = addr2line.get_dso(dso_name)
610                if not dso:
611                    continue
612                sources = addr2line.get_addr_source(dso, function.vaddr_in_dso)
613                if sources:
614                    source_file, source_line, _ = sources[0]
615                    function.source_filename_id = self.get_string_id(source_file)
616                    function.start_line = source_line
617
618    def add_line(self, source_file, source_line, function_id):
619        line = Line()
620        function = self.get_function(function_id)
621        function.source_filename_id = self.get_string_id(source_file)
622        line.function_id = function_id
623        line.line = source_line
624        return line
625
626    def gen_profile_sample(self, sample):
627        profile_sample = self.profile.sample.add()
628        profile_sample.location_id.extend(sample.location_ids)
629        sample_type_count = len(self.sample_types) * 2
630        values = [0] * sample_type_count
631        for sample_type_id in sample.values:
632            values[sample_type_id] = sample.values[sample_type_id]
633        profile_sample.value.extend(values)
634
635        for l in sample.labels:
636            label = profile_sample.label.add()
637            label.key = l.key_id
638            label.str = l.str_id
639
640    def gen_profile_mapping(self, mapping):
641        profile_mapping = self.profile.mapping.add()
642        profile_mapping.id = mapping.id
643        profile_mapping.memory_start = mapping.memory_start
644        profile_mapping.memory_limit = mapping.memory_limit
645        profile_mapping.file_offset = mapping.file_offset
646        profile_mapping.filename = mapping.filename_id
647        profile_mapping.build_id = mapping.build_id_id
648        profile_mapping.has_filenames = True
649        profile_mapping.has_functions = True
650        if self.config.get('binary_cache_dir'):
651            profile_mapping.has_line_numbers = True
652            profile_mapping.has_inline_frames = True
653        else:
654            profile_mapping.has_line_numbers = False
655            profile_mapping.has_inline_frames = False
656
657    def gen_profile_location(self, location):
658        profile_location = self.profile.location.add()
659        profile_location.id = location.id
660        profile_location.mapping_id = location.mapping_id
661        profile_location.address = location.address
662        for i in range(len(location.lines)):
663            line = profile_location.line.add()
664            line.function_id = location.lines[i].function_id
665            line.line = location.lines[i].line
666
667    def gen_profile_function(self, function):
668        profile_function = self.profile.function.add()
669        profile_function.id = function.id
670        profile_function.name = function.name_id
671        profile_function.system_name = function.name_id
672        profile_function.filename = function.source_filename_id
673        profile_function.start_line = function.start_line
674
675
676def main():
677    parser = BaseArgumentParser(description='Generate pprof profile data in pprof.profile.')
678    parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.')
679    parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help="""
680        Set profiling data file to report. Default is perf.data""")
681    parser.add_argument('-o', '--output_file', default='pprof.profile', help="""
682        The path of generated pprof profile data.""")
683    parser.add_argument('--max_chain_length', type=int, default=1000000000, help="""
684        Maximum depth of samples to be converted.""")  # Large value as infinity standin.
685    parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.')
686    parser.add_argument(
687        '-j', '--jobs', type=int, default=os.cpu_count(),
688        help='Use multithreading to speed up source code annotation.')
689    parser.add_argument('--tagroot', choices=['comm', 'thread_comm'], nargs='+', help="""
690        Add pseudo stack frames at the callstack root. All possible frames are:
691        comm (process:<process_name>), thread_comm (thread:<thread_name>).
692    """)
693    parser.add_argument(
694        '--show_event_counters', action='store_true',
695        help='Show events in counters for profile recorded with --add-counter options.'
696    )
697    sample_filter_group = parser.add_argument_group('Sample filter options')
698    sample_filter_group.add_argument('--dso', nargs='+', action='append', help="""
699        Use samples only in selected binaries.""")
700    parser.add_report_lib_options(sample_filter_group=sample_filter_group)
701
702    args = parser.parse_args()
703    if args.show:
704        show_file = args.show[0] if args.show[0] else 'pprof.profile'
705        profile = load_pprof_profile(show_file)
706        printer = PprofProfilePrinter(profile)
707        printer.show()
708        return
709
710    config = {}
711    config['output_file'] = args.output_file
712    config['dso_filters'] = flatten_arg_list(args.dso)
713    config['ndk_path'] = args.ndk_path
714    config['max_chain_length'] = args.max_chain_length
715    config['report_lib_options'] = args.report_lib_options
716    config['tagroot'] = args.tagroot
717    config['show_event_counters'] = args.show_event_counters
718    generator = PprofProfileGenerator(config)
719    for record_file in args.record_file:
720        generator.load_record_file(record_file)
721    profile = generator.gen(args.jobs)
722    store_pprof_profile(config['output_file'], profile)
723    logging.info("Report is generated at '%s' successfully." % config['output_file'])
724    logging.info('Before uploading to the continuous PProf UI, use gzip to compress the file.')
725
726
727if __name__ == '__main__':
728    main()
729