1"""Print a summary of specialization stats for all files in the
2default stats folders.
3"""
4
5import collections
6import os.path
7import opcode
8from datetime import date
9import itertools
10import argparse
11
12if os.name == "nt":
13    DEFAULT_DIR = "c:\\temp\\py_stats\\"
14else:
15    DEFAULT_DIR = "/tmp/py_stats/"
16
17#Create list of all instruction names
18specialized = iter(opcode._specialized_instructions)
19opname = ["<0>"]
20for name in opcode.opname[1:]:
21    if name.startswith("<"):
22        try:
23            name = next(specialized)
24        except StopIteration:
25            pass
26    opname.append(name)
27
28# opcode_name --> opcode
29# Sort alphabetically.
30opmap = {name: i for i, name in enumerate(opname)}
31opmap = dict(sorted(opmap.items()))
32
33TOTAL = "specialization.deferred", "specialization.hit", "specialization.miss", "execution_count"
34
35def print_specialization_stats(name, family_stats, defines):
36    if "specializable" not in family_stats:
37        return
38    total = sum(family_stats.get(kind, 0) for kind in TOTAL)
39    if total == 0:
40        return
41    with Section(name, 3, f"specialization stats for {name} family"):
42        rows = []
43        for key in sorted(family_stats):
44            if key.startswith("specialization.failure_kinds"):
45                continue
46            if key in ("specialization.hit", "specialization.miss"):
47                label = key[len("specialization."):]
48            elif key == "execution_count":
49                label = "unquickened"
50            elif key in ("specialization.success",  "specialization.failure", "specializable"):
51                continue
52            elif key.startswith("pair"):
53                continue
54            else:
55                label = key
56            rows.append((f"{label:>12}", f"{family_stats[key]:>12}", f"{100*family_stats[key]/total:0.1f}%"))
57        emit_table(("Kind", "Count", "Ratio"), rows)
58        print_title("Specialization attempts", 4)
59        total_attempts = 0
60        for key in ("specialization.success",  "specialization.failure"):
61            total_attempts += family_stats.get(key, 0)
62        rows = []
63        for key in ("specialization.success",  "specialization.failure"):
64            label = key[len("specialization."):]
65            label = label[0].upper() + label[1:]
66            val = family_stats.get(key, 0)
67            rows.append((label, val, f"{100*val/total_attempts:0.1f}%"))
68        emit_table(("", "Count:", "Ratio:"), rows)
69        total_failures = family_stats.get("specialization.failure", 0)
70        failure_kinds = [ 0 ] * 30
71        for key in family_stats:
72            if not key.startswith("specialization.failure_kind"):
73                continue
74            _, index = key[:-1].split("[")
75            index =  int(index)
76            failure_kinds[index] = family_stats[key]
77        failures = [(value, index) for (index, value) in enumerate(failure_kinds)]
78        failures.sort(reverse=True)
79        rows = []
80        for value, index in failures:
81            if not value:
82                continue
83            rows.append((kind_to_text(index, defines, name), value, f"{100*value/total_failures:0.1f}%"))
84        emit_table(("Failure kind", "Count:", "Ratio:"), rows)
85
86def gather_stats():
87    stats = collections.Counter()
88    for filename in os.listdir(DEFAULT_DIR):
89        with open(os.path.join(DEFAULT_DIR, filename)) as fd:
90            for line in fd:
91                key, value = line.split(":")
92                key = key.strip()
93                value = int(value)
94                stats[key] += value
95    return stats
96
97def extract_opcode_stats(stats):
98    opcode_stats = [ {} for _ in range(256) ]
99    for key, value in stats.items():
100        if not key.startswith("opcode"):
101            continue
102        n, _, rest = key[7:].partition("]")
103        opcode_stats[int(n)][rest.strip(".")] = value
104    return opcode_stats
105
106def parse_kinds(spec_src):
107    defines = collections.defaultdict(list)
108    for line in spec_src:
109        line = line.strip()
110        if not line.startswith("#define SPEC_FAIL_"):
111            continue
112        line = line[len("#define SPEC_FAIL_"):]
113        name, val = line.split()
114        defines[int(val.strip())].append(name.strip())
115    return defines
116
117def pretty(defname):
118    return defname.replace("_", " ").lower()
119
120def kind_to_text(kind, defines, opname):
121    if kind < 7:
122        return pretty(defines[kind][0])
123    if opname.endswith("ATTR"):
124        opname = "ATTR"
125    if opname.endswith("SUBSCR"):
126        opname = "SUBSCR"
127    if opname.startswith("PRECALL"):
128        opname = "CALL"
129    for name in defines[kind]:
130        if name.startswith(opname):
131            return pretty(name[len(opname)+1:])
132    return "kind " + str(kind)
133
134def categorized_counts(opcode_stats):
135    basic = 0
136    specialized = 0
137    not_specialized = 0
138    specialized_instructions = {
139        op for op in opcode._specialized_instructions
140        if "__" not in op and "ADAPTIVE" not in op}
141    adaptive_instructions = {
142        op for op in opcode._specialized_instructions
143        if "ADAPTIVE" in op}
144    for i, opcode_stat in enumerate(opcode_stats):
145        if "execution_count" not in opcode_stat:
146            continue
147        count = opcode_stat['execution_count']
148        name = opname[i]
149        if "specializable" in opcode_stat:
150            not_specialized += count
151        elif name in adaptive_instructions:
152            not_specialized += count
153        elif name in specialized_instructions:
154            miss = opcode_stat.get("specialization.miss", 0)
155            not_specialized += miss
156            specialized += count - miss
157        else:
158            basic += count
159    return basic, not_specialized, specialized
160
161def print_title(name, level=2):
162    print("#"*level, name)
163    print()
164
165class Section:
166
167    def __init__(self, title, level=2, summary=None):
168        self.title = title
169        self.level = level
170        if summary is None:
171            self.summary = title.lower()
172        else:
173            self.summary = summary
174
175    def __enter__(self):
176        print_title(self.title, self.level)
177        print("<details>")
178        print("<summary>", self.summary, "</summary>")
179        print()
180        return self
181
182    def __exit__(*args):
183        print()
184        print("</details>")
185        print()
186
187def emit_table(header, rows):
188    width = len(header)
189    header_line = "|"
190    under_line = "|"
191    for item in header:
192        under = "---"
193        if item.endswith(":"):
194            item = item[:-1]
195            under += ":"
196        header_line += item + " | "
197        under_line += under + "|"
198    print(header_line)
199    print(under_line)
200    for row in rows:
201        if width is not None and len(row) != width:
202            raise ValueError("Wrong number of elements in row '" + str(rows) + "'")
203        print("|", " | ".join(str(i) for i in row), "|")
204    print()
205
206def emit_execution_counts(opcode_stats, total):
207    with Section("Execution counts", summary="execution counts for all instructions"):
208        counts = []
209        for i, opcode_stat in enumerate(opcode_stats):
210            if "execution_count" in opcode_stat:
211                count = opcode_stat['execution_count']
212                miss = 0
213                if "specializable" not in opcode_stat:
214                    miss = opcode_stat.get("specialization.miss")
215                counts.append((count, opname[i], miss))
216        counts.sort(reverse=True)
217        cumulative = 0
218        rows = []
219        for (count, name, miss) in counts:
220            cumulative += count
221            if miss:
222                miss =  f"{100*miss/count:0.1f}%"
223            else:
224                miss = ""
225            rows.append((name, count, f"{100*count/total:0.1f}%",
226                        f"{100*cumulative/total:0.1f}%", miss))
227        emit_table(
228            ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"),
229            rows
230        )
231
232
233def emit_specialization_stats(opcode_stats):
234    spec_path = os.path.join(os.path.dirname(__file__), "../../Python/specialize.c")
235    with open(spec_path) as spec_src:
236        defines = parse_kinds(spec_src)
237    with Section("Specialization stats", summary="specialization stats by family"):
238        for i, opcode_stat in enumerate(opcode_stats):
239            name = opname[i]
240            print_specialization_stats(name, opcode_stat, defines)
241
242def emit_specialization_overview(opcode_stats, total):
243    basic, not_specialized, specialized = categorized_counts(opcode_stats)
244    with Section("Specialization effectiveness"):
245        emit_table(("Instructions", "Count:", "Ratio:"), (
246            ("Basic", basic, f"{basic*100/total:0.1f}%"),
247            ("Not specialized", not_specialized, f"{not_specialized*100/total:0.1f}%"),
248            ("Specialized", specialized, f"{specialized*100/total:0.1f}%"),
249        ))
250
251def emit_call_stats(stats):
252    with Section("Call stats", summary="Inlined calls and frame stats"):
253        total = 0
254        for key, value in stats.items():
255            if "Calls to" in key:
256                total += value
257        rows = []
258        for key, value in stats.items():
259            if "Calls to" in key:
260                rows.append((key, value, f"{100*value/total:0.1f}%"))
261        for key, value in stats.items():
262            if key.startswith("Frame"):
263                rows.append((key, value, f"{100*value/total:0.1f}%"))
264        emit_table(("", "Count:", "Ratio:"), rows)
265
266def emit_object_stats(stats):
267    with Section("Object stats", summary="allocations, frees and dict materializatons"):
268        total = stats.get("Object new values")
269        rows = []
270        for key, value in stats.items():
271            if key.startswith("Object"):
272                if "materialize" in key:
273                    materialize = f"{100*value/total:0.1f}%"
274                else:
275                    materialize = ""
276                label = key[6:].strip()
277                label = label[0].upper() + label[1:]
278                rows.append((label, value, materialize))
279        emit_table(("",  "Count:", "Ratio:"), rows)
280
281def get_total(opcode_stats):
282    total = 0
283    for opcode_stat in opcode_stats:
284        if "execution_count" in opcode_stat:
285            total += opcode_stat['execution_count']
286    return total
287
288def emit_pair_counts(opcode_stats, total):
289    pair_counts = []
290    for i, opcode_stat in enumerate(opcode_stats):
291        if i == 0:
292            continue
293        for key, value in opcode_stat.items():
294            if key.startswith("pair_count"):
295                x, _, _ = key[11:].partition("]")
296                if value:
297                    pair_counts.append((value, (i, int(x))))
298    with Section("Pair counts", summary="Pair counts for top 100 pairs"):
299        pair_counts.sort(reverse=True)
300        cumulative = 0
301        rows = []
302        for (count, pair) in itertools.islice(pair_counts, 100):
303            i, j = pair
304            cumulative += count
305            rows.append((opname[i] + " " + opname[j], count, f"{100*count/total:0.1f}%",
306                        f"{100*cumulative/total:0.1f}%"))
307        emit_table(("Pair", "Count:", "Self:", "Cumulative:"),
308            rows
309        )
310    with Section("Predecessor/Successor Pairs", summary="Top 3 predecessors and successors of each opcode"):
311        predecessors = collections.defaultdict(collections.Counter)
312        successors = collections.defaultdict(collections.Counter)
313        total_predecessors = collections.Counter()
314        total_successors = collections.Counter()
315        for count, (first, second) in pair_counts:
316            if count:
317                predecessors[second][first] = count
318                successors[first][second] = count
319                total_predecessors[second] += count
320                total_successors[first] += count
321        for name, i in opmap.items():
322            total1 = total_predecessors[i]
323            total2 = total_successors[i]
324            if total1 == 0 and total2 == 0:
325                continue
326            pred_rows = succ_rows = ()
327            if total1:
328                pred_rows = [(opname[pred], count, f"{count/total1:.1%}")
329                             for (pred, count) in predecessors[i].most_common(3)]
330            if total2:
331                succ_rows = [(opname[succ], count, f"{count/total2:.1%}")
332                             for (succ, count) in successors[i].most_common(3)]
333            with Section(name, 3, f"Successors and predecessors for {name}"):
334                emit_table(("Predecessors", "Count:", "Percentage:"),
335                    pred_rows
336                )
337                emit_table(("Successors", "Count:", "Percentage:"),
338                    succ_rows
339                )
340
341def main():
342    stats = gather_stats()
343    opcode_stats = extract_opcode_stats(stats)
344    total = get_total(opcode_stats)
345    emit_execution_counts(opcode_stats, total)
346    emit_pair_counts(opcode_stats, total)
347    emit_specialization_stats(opcode_stats)
348    emit_specialization_overview(opcode_stats, total)
349    emit_call_stats(stats)
350    emit_object_stats(stats)
351    print("---")
352    print("Stats gathered on:", date.today())
353
354if __name__ == "__main__":
355    main()
356