xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/profile.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1#! /usr/bin/env python3
2#
3# Class for profiling python code. rev 1.0  6/2/94
4#
5# Written by James Roskind
6# Based on prior profile module by Sjoerd Mullender...
7#   which was hacked somewhat by: Guido van Rossum
8
9"""Class for profiling Python code."""
10
11# Copyright Disney Enterprises, Inc.  All Rights Reserved.
12# Licensed to PSF under a Contributor Agreement
13#
14# Licensed under the Apache License, Version 2.0 (the "License");
15# you may not use this file except in compliance with the License.
16# You may obtain a copy of the License at
17#
18# http://www.apache.org/licenses/LICENSE-2.0
19#
20# Unless required by applicable law or agreed to in writing, software
21# distributed under the License is distributed on an "AS IS" BASIS,
22# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
23# either express or implied.  See the License for the specific language
24# governing permissions and limitations under the License.
25
26
27import io
28import sys
29import time
30import marshal
31
32__all__ = ["run", "runctx", "Profile"]
33
34# Sample timer for use with
35#i_count = 0
36#def integer_timer():
37#       global i_count
38#       i_count = i_count + 1
39#       return i_count
40#itimes = integer_timer # replace with C coded timer returning integers
41
42class _Utils:
43    """Support class for utility functions which are shared by
44    profile.py and cProfile.py modules.
45    Not supposed to be used directly.
46    """
47
48    def __init__(self, profiler):
49        self.profiler = profiler
50
51    def run(self, statement, filename, sort):
52        prof = self.profiler()
53        try:
54            prof.run(statement)
55        except SystemExit:
56            pass
57        finally:
58            self._show(prof, filename, sort)
59
60    def runctx(self, statement, globals, locals, filename, sort):
61        prof = self.profiler()
62        try:
63            prof.runctx(statement, globals, locals)
64        except SystemExit:
65            pass
66        finally:
67            self._show(prof, filename, sort)
68
69    def _show(self, prof, filename, sort):
70        if filename is not None:
71            prof.dump_stats(filename)
72        else:
73            prof.print_stats(sort)
74
75
76#**************************************************************************
77# The following are the static member functions for the profiler class
78# Note that an instance of Profile() is *not* needed to call them.
79#**************************************************************************
80
81def run(statement, filename=None, sort=-1):
82    """Run statement under profiler optionally saving results in filename
83
84    This function takes a single argument that can be passed to the
85    "exec" statement, and an optional file name.  In all cases this
86    routine attempts to "exec" its first argument and gather profiling
87    statistics from the execution. If no file name is present, then this
88    function automatically prints a simple profiling report, sorted by the
89    standard name string (file/line/function-name) that is presented in
90    each line.
91    """
92    return _Utils(Profile).run(statement, filename, sort)
93
94def runctx(statement, globals, locals, filename=None, sort=-1):
95    """Run statement under profiler, supplying your own globals and locals,
96    optionally saving results in filename.
97
98    statement and filename have the same semantics as profile.run
99    """
100    return _Utils(Profile).runctx(statement, globals, locals, filename, sort)
101
102
103class Profile:
104    """Profiler class.
105
106    self.cur is always a tuple.  Each such tuple corresponds to a stack
107    frame that is currently active (self.cur[-2]).  The following are the
108    definitions of its members.  We use this external "parallel stack" to
109    avoid contaminating the program that we are profiling. (old profiler
110    used to write into the frames local dictionary!!) Derived classes
111    can change the definition of some entries, as long as they leave
112    [-2:] intact (frame and previous tuple).  In case an internal error is
113    detected, the -3 element is used as the function name.
114
115    [ 0] = Time that needs to be charged to the parent frame's function.
116           It is used so that a function call will not have to access the
117           timing data for the parent frame.
118    [ 1] = Total time spent in this frame's function, excluding time in
119           subfunctions (this latter is tallied in cur[2]).
120    [ 2] = Total time spent in subfunctions, excluding time executing the
121           frame's function (this latter is tallied in cur[1]).
122    [-3] = Name of the function that corresponds to this frame.
123    [-2] = Actual frame that we correspond to (used to sync exception handling).
124    [-1] = Our parent 6-tuple (corresponds to frame.f_back).
125
126    Timing data for each function is stored as a 5-tuple in the dictionary
127    self.timings[].  The index is always the name stored in self.cur[-3].
128    The following are the definitions of the members:
129
130    [0] = The number of times this function was called, not counting direct
131          or indirect recursion,
132    [1] = Number of times this function appears on the stack, minus one
133    [2] = Total time spent internal to this function
134    [3] = Cumulative time that this function was present on the stack.  In
135          non-recursive functions, this is the total execution time from start
136          to finish of each invocation of a function, including time spent in
137          all subfunctions.
138    [4] = A dictionary indicating for each function name, the number of times
139          it was called by us.
140    """
141
142    bias = 0  # calibration constant
143
144    def __init__(self, timer=None, bias=None):
145        self.timings = {}
146        self.cur = None
147        self.cmd = ""
148        self.c_func_name = ""
149
150        if bias is None:
151            bias = self.bias
152        self.bias = bias     # Materialize in local dict for lookup speed.
153
154        if not timer:
155            self.timer = self.get_time = time.process_time
156            self.dispatcher = self.trace_dispatch_i
157        else:
158            self.timer = timer
159            t = self.timer() # test out timer function
160            try:
161                length = len(t)
162            except TypeError:
163                self.get_time = timer
164                self.dispatcher = self.trace_dispatch_i
165            else:
166                if length == 2:
167                    self.dispatcher = self.trace_dispatch
168                else:
169                    self.dispatcher = self.trace_dispatch_l
170                # This get_time() implementation needs to be defined
171                # here to capture the passed-in timer in the parameter
172                # list (for performance).  Note that we can't assume
173                # the timer() result contains two values in all
174                # cases.
175                def get_time_timer(timer=timer, sum=sum):
176                    return sum(timer())
177                self.get_time = get_time_timer
178        self.t = self.get_time()
179        self.simulate_call('profiler')
180
181    # Heavily optimized dispatch routine for time.process_time() timer
182
183    def trace_dispatch(self, frame, event, arg):
184        timer = self.timer
185        t = timer()
186        t = t[0] + t[1] - self.t - self.bias
187
188        if event == "c_call":
189            self.c_func_name = arg.__name__
190
191        if self.dispatch[event](self, frame,t):
192            t = timer()
193            self.t = t[0] + t[1]
194        else:
195            r = timer()
196            self.t = r[0] + r[1] - t # put back unrecorded delta
197
198    # Dispatch routine for best timer program (return = scalar, fastest if
199    # an integer but float works too -- and time.process_time() relies on that).
200
201    def trace_dispatch_i(self, frame, event, arg):
202        timer = self.timer
203        t = timer() - self.t - self.bias
204
205        if event == "c_call":
206            self.c_func_name = arg.__name__
207
208        if self.dispatch[event](self, frame, t):
209            self.t = timer()
210        else:
211            self.t = timer() - t  # put back unrecorded delta
212
213    # Dispatch routine for macintosh (timer returns time in ticks of
214    # 1/60th second)
215
216    def trace_dispatch_mac(self, frame, event, arg):
217        timer = self.timer
218        t = timer()/60.0 - self.t - self.bias
219
220        if event == "c_call":
221            self.c_func_name = arg.__name__
222
223        if self.dispatch[event](self, frame, t):
224            self.t = timer()/60.0
225        else:
226            self.t = timer()/60.0 - t  # put back unrecorded delta
227
228    # SLOW generic dispatch routine for timer returning lists of numbers
229
230    def trace_dispatch_l(self, frame, event, arg):
231        get_time = self.get_time
232        t = get_time() - self.t - self.bias
233
234        if event == "c_call":
235            self.c_func_name = arg.__name__
236
237        if self.dispatch[event](self, frame, t):
238            self.t = get_time()
239        else:
240            self.t = get_time() - t # put back unrecorded delta
241
242    # In the event handlers, the first 3 elements of self.cur are unpacked
243    # into vrbls w/ 3-letter names.  The last two characters are meant to be
244    # mnemonic:
245    #     _pt  self.cur[0] "parent time"   time to be charged to parent frame
246    #     _it  self.cur[1] "internal time" time spent directly in the function
247    #     _et  self.cur[2] "external time" time spent in subfunctions
248
249    def trace_dispatch_exception(self, frame, t):
250        rpt, rit, ret, rfn, rframe, rcur = self.cur
251        if (rframe is not frame) and rcur:
252            return self.trace_dispatch_return(rframe, t)
253        self.cur = rpt, rit+t, ret, rfn, rframe, rcur
254        return 1
255
256
257    def trace_dispatch_call(self, frame, t):
258        if self.cur and frame.f_back is not self.cur[-2]:
259            rpt, rit, ret, rfn, rframe, rcur = self.cur
260            if not isinstance(rframe, Profile.fake_frame):
261                assert rframe.f_back is frame.f_back, ("Bad call", rfn,
262                                                       rframe, rframe.f_back,
263                                                       frame, frame.f_back)
264                self.trace_dispatch_return(rframe, 0)
265                assert (self.cur is None or \
266                        frame.f_back is self.cur[-2]), ("Bad call",
267                                                        self.cur[-3])
268        fcode = frame.f_code
269        fn = (fcode.co_filename, fcode.co_firstlineno, fcode.co_name)
270        self.cur = (t, 0, 0, fn, frame, self.cur)
271        timings = self.timings
272        if fn in timings:
273            cc, ns, tt, ct, callers = timings[fn]
274            timings[fn] = cc, ns + 1, tt, ct, callers
275        else:
276            timings[fn] = 0, 0, 0, 0, {}
277        return 1
278
279    def trace_dispatch_c_call (self, frame, t):
280        fn = ("", 0, self.c_func_name)
281        self.cur = (t, 0, 0, fn, frame, self.cur)
282        timings = self.timings
283        if fn in timings:
284            cc, ns, tt, ct, callers = timings[fn]
285            timings[fn] = cc, ns+1, tt, ct, callers
286        else:
287            timings[fn] = 0, 0, 0, 0, {}
288        return 1
289
290    def trace_dispatch_return(self, frame, t):
291        if frame is not self.cur[-2]:
292            assert frame is self.cur[-2].f_back, ("Bad return", self.cur[-3])
293            self.trace_dispatch_return(self.cur[-2], 0)
294
295        # Prefix "r" means part of the Returning or exiting frame.
296        # Prefix "p" means part of the Previous or Parent or older frame.
297
298        rpt, rit, ret, rfn, frame, rcur = self.cur
299        rit = rit + t
300        frame_total = rit + ret
301
302        ppt, pit, pet, pfn, pframe, pcur = rcur
303        self.cur = ppt, pit + rpt, pet + frame_total, pfn, pframe, pcur
304
305        timings = self.timings
306        cc, ns, tt, ct, callers = timings[rfn]
307        if not ns:
308            # This is the only occurrence of the function on the stack.
309            # Else this is a (directly or indirectly) recursive call, and
310            # its cumulative time will get updated when the topmost call to
311            # it returns.
312            ct = ct + frame_total
313            cc = cc + 1
314
315        if pfn in callers:
316            callers[pfn] = callers[pfn] + 1  # hack: gather more
317            # stats such as the amount of time added to ct courtesy
318            # of this specific call, and the contribution to cc
319            # courtesy of this call.
320        else:
321            callers[pfn] = 1
322
323        timings[rfn] = cc, ns - 1, tt + rit, ct, callers
324
325        return 1
326
327
328    dispatch = {
329        "call": trace_dispatch_call,
330        "exception": trace_dispatch_exception,
331        "return": trace_dispatch_return,
332        "c_call": trace_dispatch_c_call,
333        "c_exception": trace_dispatch_return,  # the C function returned
334        "c_return": trace_dispatch_return,
335        }
336
337
338    # The next few functions play with self.cmd. By carefully preloading
339    # our parallel stack, we can force the profiled result to include
340    # an arbitrary string as the name of the calling function.
341    # We use self.cmd as that string, and the resulting stats look
342    # very nice :-).
343
344    def set_cmd(self, cmd):
345        if self.cur[-1]: return   # already set
346        self.cmd = cmd
347        self.simulate_call(cmd)
348
349    class fake_code:
350        def __init__(self, filename, line, name):
351            self.co_filename = filename
352            self.co_line = line
353            self.co_name = name
354            self.co_firstlineno = 0
355
356        def __repr__(self):
357            return repr((self.co_filename, self.co_line, self.co_name))
358
359    class fake_frame:
360        def __init__(self, code, prior):
361            self.f_code = code
362            self.f_back = prior
363
364    def simulate_call(self, name):
365        code = self.fake_code('profile', 0, name)
366        if self.cur:
367            pframe = self.cur[-2]
368        else:
369            pframe = None
370        frame = self.fake_frame(code, pframe)
371        self.dispatch['call'](self, frame, 0)
372
373    # collect stats from pending stack, including getting final
374    # timings for self.cmd frame.
375
376    def simulate_cmd_complete(self):
377        get_time = self.get_time
378        t = get_time() - self.t
379        while self.cur[-1]:
380            # We *can* cause assertion errors here if
381            # dispatch_trace_return checks for a frame match!
382            self.dispatch['return'](self, self.cur[-2], t)
383            t = 0
384        self.t = get_time() - t
385
386
387    def print_stats(self, sort=-1):
388        import pstats
389        pstats.Stats(self).strip_dirs().sort_stats(sort). \
390                  print_stats()
391
392    def dump_stats(self, file):
393        with open(file, 'wb') as f:
394            self.create_stats()
395            marshal.dump(self.stats, f)
396
397    def create_stats(self):
398        self.simulate_cmd_complete()
399        self.snapshot_stats()
400
401    def snapshot_stats(self):
402        self.stats = {}
403        for func, (cc, ns, tt, ct, callers) in self.timings.items():
404            callers = callers.copy()
405            nc = 0
406            for callcnt in callers.values():
407                nc += callcnt
408            self.stats[func] = cc, nc, tt, ct, callers
409
410
411    # The following two methods can be called by clients to use
412    # a profiler to profile a statement, given as a string.
413
414    def run(self, cmd):
415        import __main__
416        dict = __main__.__dict__
417        return self.runctx(cmd, dict, dict)
418
419    def runctx(self, cmd, globals, locals):
420        self.set_cmd(cmd)
421        sys.setprofile(self.dispatcher)
422        try:
423            exec(cmd, globals, locals)
424        finally:
425            sys.setprofile(None)
426        return self
427
428    # This method is more useful to profile a single function call.
429    def runcall(self, func, /, *args, **kw):
430        self.set_cmd(repr(func))
431        sys.setprofile(self.dispatcher)
432        try:
433            return func(*args, **kw)
434        finally:
435            sys.setprofile(None)
436
437
438    #******************************************************************
439    # The following calculates the overhead for using a profiler.  The
440    # problem is that it takes a fair amount of time for the profiler
441    # to stop the stopwatch (from the time it receives an event).
442    # Similarly, there is a delay from the time that the profiler
443    # re-starts the stopwatch before the user's code really gets to
444    # continue.  The following code tries to measure the difference on
445    # a per-event basis.
446    #
447    # Note that this difference is only significant if there are a lot of
448    # events, and relatively little user code per event.  For example,
449    # code with small functions will typically benefit from having the
450    # profiler calibrated for the current platform.  This *could* be
451    # done on the fly during init() time, but it is not worth the
452    # effort.  Also note that if too large a value specified, then
453    # execution time on some functions will actually appear as a
454    # negative number.  It is *normal* for some functions (with very
455    # low call counts) to have such negative stats, even if the
456    # calibration figure is "correct."
457    #
458    # One alternative to profile-time calibration adjustments (i.e.,
459    # adding in the magic little delta during each event) is to track
460    # more carefully the number of events (and cumulatively, the number
461    # of events during sub functions) that are seen.  If this were
462    # done, then the arithmetic could be done after the fact (i.e., at
463    # display time).  Currently, we track only call/return events.
464    # These values can be deduced by examining the callees and callers
465    # vectors for each functions.  Hence we *can* almost correct the
466    # internal time figure at print time (note that we currently don't
467    # track exception event processing counts).  Unfortunately, there
468    # is currently no similar information for cumulative sub-function
469    # time.  It would not be hard to "get all this info" at profiler
470    # time.  Specifically, we would have to extend the tuples to keep
471    # counts of this in each frame, and then extend the defs of timing
472    # tuples to include the significant two figures. I'm a bit fearful
473    # that this additional feature will slow the heavily optimized
474    # event/time ratio (i.e., the profiler would run slower, fur a very
475    # low "value added" feature.)
476    #**************************************************************
477
478    def calibrate(self, m, verbose=0):
479        if self.__class__ is not Profile:
480            raise TypeError("Subclasses must override .calibrate().")
481
482        saved_bias = self.bias
483        self.bias = 0
484        try:
485            return self._calibrate_inner(m, verbose)
486        finally:
487            self.bias = saved_bias
488
489    def _calibrate_inner(self, m, verbose):
490        get_time = self.get_time
491
492        # Set up a test case to be run with and without profiling.  Include
493        # lots of calls, because we're trying to quantify stopwatch overhead.
494        # Do not raise any exceptions, though, because we want to know
495        # exactly how many profile events are generated (one call event, +
496        # one return event, per Python-level call).
497
498        def f1(n):
499            for i in range(n):
500                x = 1
501
502        def f(m, f1=f1):
503            for i in range(m):
504                f1(100)
505
506        f(m)    # warm up the cache
507
508        # elapsed_noprofile <- time f(m) takes without profiling.
509        t0 = get_time()
510        f(m)
511        t1 = get_time()
512        elapsed_noprofile = t1 - t0
513        if verbose:
514            print("elapsed time without profiling =", elapsed_noprofile)
515
516        # elapsed_profile <- time f(m) takes with profiling.  The difference
517        # is profiling overhead, only some of which the profiler subtracts
518        # out on its own.
519        p = Profile()
520        t0 = get_time()
521        p.runctx('f(m)', globals(), locals())
522        t1 = get_time()
523        elapsed_profile = t1 - t0
524        if verbose:
525            print("elapsed time with profiling =", elapsed_profile)
526
527        # reported_time <- "CPU seconds" the profiler charged to f and f1.
528        total_calls = 0.0
529        reported_time = 0.0
530        for (filename, line, funcname), (cc, ns, tt, ct, callers) in \
531                p.timings.items():
532            if funcname in ("f", "f1"):
533                total_calls += cc
534                reported_time += tt
535
536        if verbose:
537            print("'CPU seconds' profiler reported =", reported_time)
538            print("total # calls =", total_calls)
539        if total_calls != m + 1:
540            raise ValueError("internal error: total calls = %d" % total_calls)
541
542        # reported_time - elapsed_noprofile = overhead the profiler wasn't
543        # able to measure.  Divide by twice the number of calls (since there
544        # are two profiler events per call in this test) to get the hidden
545        # overhead per event.
546        mean = (reported_time - elapsed_noprofile) / 2.0 / total_calls
547        if verbose:
548            print("mean stopwatch overhead per profile event =", mean)
549        return mean
550
551#****************************************************************************
552
553def main():
554    import os
555    from optparse import OptionParser
556
557    usage = "profile.py [-o output_file_path] [-s sort] [-m module | scriptfile] [arg] ..."
558    parser = OptionParser(usage=usage)
559    parser.allow_interspersed_args = False
560    parser.add_option('-o', '--outfile', dest="outfile",
561        help="Save stats to <outfile>", default=None)
562    parser.add_option('-m', dest="module", action="store_true",
563        help="Profile a library module.", default=False)
564    parser.add_option('-s', '--sort', dest="sort",
565        help="Sort order when printing to stdout, based on pstats.Stats class",
566        default=-1)
567
568    if not sys.argv[1:]:
569        parser.print_usage()
570        sys.exit(2)
571
572    (options, args) = parser.parse_args()
573    sys.argv[:] = args
574
575    # The script that we're profiling may chdir, so capture the absolute path
576    # to the output file at startup.
577    if options.outfile is not None:
578        options.outfile = os.path.abspath(options.outfile)
579
580    if len(args) > 0:
581        if options.module:
582            import runpy
583            code = "run_module(modname, run_name='__main__')"
584            globs = {
585                'run_module': runpy.run_module,
586                'modname': args[0]
587            }
588        else:
589            progname = args[0]
590            sys.path.insert(0, os.path.dirname(progname))
591            with io.open_code(progname) as fp:
592                code = compile(fp.read(), progname, 'exec')
593            globs = {
594                '__file__': progname,
595                '__name__': '__main__',
596                '__package__': None,
597                '__cached__': None,
598            }
599        try:
600            runctx(code, globs, None, options.outfile, options.sort)
601        except BrokenPipeError as exc:
602            # Prevent "Exception ignored" during interpreter shutdown.
603            sys.stdout = None
604            sys.exit(exc.errno)
605    else:
606        parser.print_usage()
607    return parser
608
609# When invoked as main program, invoke the profiler on a script
610if __name__ == '__main__':
611    main()
612