xref: /XiangShan/scripts/top-down/utils.py (revision effccb7d09c4946a549cc139dc1b99ed9e726752)
1import os
2import os.path as osp
3from os.path import expanduser as expu
4import re
5
6
7def to_num(x: str) -> (int, float):
8    if '.' in x:
9        return float(x)
10    return int(x)
11
12
13def xs_get_stats(stat_file: str, targets: list) -> dict:
14
15    if not os.path.isfile(expu(stat_file)):
16        print(stat_file)
17    assert os.path.isfile(expu(stat_file))
18    with open(stat_file, encoding='utf-8') as f:
19        lines = f.read().splitlines()
20
21    if lines is None:
22        return None
23
24    patterns = {}
25    accumulate_table = {}  # key: pattern, value: (count, [matched values])
26    for k, p in targets.items():
27        if isinstance(p, str):
28            patterns[k] = re.compile(p)
29        else:
30            patterns[k] = re.compile(p[0])
31            accumulate_table[k] = (p[1], [])
32    stats = {}
33
34    for _, line in enumerate(lines):
35        for k, pattern in patterns.items():
36            m = pattern.search(line)
37            if m is not None:
38                if k in accumulate_table:
39                    accumulate_table[k][1].append(to_num(m.group(1)))
40                else:
41                    stats[k] = to_num(m.group(1))
42                break
43    for k, accumulate in accumulate_table:
44        stats[k] = sum(accumulate[1][-accumulate[0]:])
45
46    desired_keys = set(patterns.keys())
47    obtained_keys = set(stats.keys())
48    not_found_keys = desired_keys - obtained_keys
49    if not_found_keys:
50        print(stat_file)
51        print(targets)
52        print(not_found_keys)
53    assert len(not_found_keys) == 0
54
55    stats['ipc'] = stats['commitInstr'] / stats['total_cycles']
56    return stats
57
58
59def workload_point_frompath(path):
60    split_path = path.split('/')[0].split('_')
61    second_layer = path.split('/')[1]
62    level = 1
63    if second_layer.isdigit() and len(second_layer) > 1:  # workload/point/ ; work_load/point/
64        workload = path.split('/')[0]
65        point = second_layer
66        level = 2
67    elif len(split_path) == 3 and split_path[1].isdigit():  # workload_point_xxx/
68        workload = split_path[0]
69        point = split_path[1]
70    elif len(split_path) == 4 and split_path[2].isdigit():  # work_load_point_xxx/
71        workload = split_path[0] + '_' + split_path[1]
72        point = split_path[2]
73    else:
74        workload = path.split('/')[0]
75        point = '0'
76        level = 1
77
78    return workload, point, level
79
80
81def strip_path(file_path: str, prefix_path: str):
82    x = prefix_path.join(file_path.split(prefix_path)[1:])
83    if prefix_path.startswith('.') and x.startswith('/'):
84        x = x[1:]
85    if prefix_path.startswith('/') and x.startswith('/'):
86        x = x[1:]
87    return x
88
89
90def glob_stats(path: str, fname='x'):
91    files = []
92    # check for checkpoints conflict
93    files_map = {}
94
95    probe_stat_path = find_file_in_maze(path, fname)  # use it to probe the directory layout
96    workload, point, segments = workload_point_frompath(strip_path(probe_stat_path, path))
97    for l2_dir in os.listdir(path):
98        l2_path = osp.join(path, l2_dir)
99        # workload/point
100        if segments == 2:
101            # two layer directory
102            for l3_dir in os.listdir(l2_path):
103                l3_path = osp.join(l2_path, l3_dir)
104                if not osp.isdir(l3_path):
105                    continue
106                stat_path = find_file_in_maze(l3_path, fname)
107                if stat_path is not None:
108                    workload, point, _ = workload_point_frompath(strip_path(stat_path, path))
109                    point_identifier = workload + '_' + point
110                    files_map.update({point_identifier: stat_path})
111                    files.append((point_identifier, stat_path))
112        else:
113            # workload_point_xx/
114            stat_path = find_file_in_maze(l2_path, fname)
115            if stat_path is not None:
116                workload, point, _ = workload_point_frompath(strip_path(stat_path, path))
117                point_identifier = workload + '_' + point
118                files_map.update({point_identifier: stat_path})
119                files.append((point_identifier, stat_path))
120    return files
121
122
123def find_file_in_maze(path: str, stat_file='stats.txt'):
124    file_path = osp.join(path, stat_file)
125    if osp.isfile(file_path) or osp.islink(file_path):
126        return file_path
127    if not osp.isdir(path):
128        return None
129    for l2_dir in os.listdir(path):
130        l2_path = osp.join(path, l2_dir)
131        if not osp.isdir(l2_path):
132            continue
133        ret = find_file_in_maze(l2_path, stat_file)
134        if ret is not None:
135            return ret
136    return None
137