xref: /XiangShan/scripts/top-down/utils.py (revision effccb7d09c4946a549cc139dc1b99ed9e726752)
1*effccb7dSTang Haojinimport os
2*effccb7dSTang Haojinimport os.path as osp
3*effccb7dSTang Haojinfrom os.path import expanduser as expu
4*effccb7dSTang Haojinimport re
5*effccb7dSTang Haojin
6*effccb7dSTang Haojin
7*effccb7dSTang Haojindef to_num(x: str) -> (int, float):
8*effccb7dSTang Haojin    if '.' in x:
9*effccb7dSTang Haojin        return float(x)
10*effccb7dSTang Haojin    return int(x)
11*effccb7dSTang Haojin
12*effccb7dSTang Haojin
13*effccb7dSTang Haojindef xs_get_stats(stat_file: str, targets: list) -> dict:
14*effccb7dSTang Haojin
15*effccb7dSTang Haojin    if not os.path.isfile(expu(stat_file)):
16*effccb7dSTang Haojin        print(stat_file)
17*effccb7dSTang Haojin    assert os.path.isfile(expu(stat_file))
18*effccb7dSTang Haojin    with open(stat_file, encoding='utf-8') as f:
19*effccb7dSTang Haojin        lines = f.read().splitlines()
20*effccb7dSTang Haojin
21*effccb7dSTang Haojin    if lines is None:
22*effccb7dSTang Haojin        return None
23*effccb7dSTang Haojin
24*effccb7dSTang Haojin    patterns = {}
25*effccb7dSTang Haojin    accumulate_table = {}  # key: pattern, value: (count, [matched values])
26*effccb7dSTang Haojin    for k, p in targets.items():
27*effccb7dSTang Haojin        if isinstance(p, str):
28*effccb7dSTang Haojin            patterns[k] = re.compile(p)
29*effccb7dSTang Haojin        else:
30*effccb7dSTang Haojin            patterns[k] = re.compile(p[0])
31*effccb7dSTang Haojin            accumulate_table[k] = (p[1], [])
32*effccb7dSTang Haojin    stats = {}
33*effccb7dSTang Haojin
34*effccb7dSTang Haojin    for _, line in enumerate(lines):
35*effccb7dSTang Haojin        for k, pattern in patterns.items():
36*effccb7dSTang Haojin            m = pattern.search(line)
37*effccb7dSTang Haojin            if m is not None:
38*effccb7dSTang Haojin                if k in accumulate_table:
39*effccb7dSTang Haojin                    accumulate_table[k][1].append(to_num(m.group(1)))
40*effccb7dSTang Haojin                else:
41*effccb7dSTang Haojin                    stats[k] = to_num(m.group(1))
42*effccb7dSTang Haojin                break
43*effccb7dSTang Haojin    for k, accumulate in accumulate_table:
44*effccb7dSTang Haojin        stats[k] = sum(accumulate[1][-accumulate[0]:])
45*effccb7dSTang Haojin
46*effccb7dSTang Haojin    desired_keys = set(patterns.keys())
47*effccb7dSTang Haojin    obtained_keys = set(stats.keys())
48*effccb7dSTang Haojin    not_found_keys = desired_keys - obtained_keys
49*effccb7dSTang Haojin    if not_found_keys:
50*effccb7dSTang Haojin        print(stat_file)
51*effccb7dSTang Haojin        print(targets)
52*effccb7dSTang Haojin        print(not_found_keys)
53*effccb7dSTang Haojin    assert len(not_found_keys) == 0
54*effccb7dSTang Haojin
55*effccb7dSTang Haojin    stats['ipc'] = stats['commitInstr'] / stats['total_cycles']
56*effccb7dSTang Haojin    return stats
57*effccb7dSTang Haojin
58*effccb7dSTang Haojin
59*effccb7dSTang Haojindef workload_point_frompath(path):
60*effccb7dSTang Haojin    split_path = path.split('/')[0].split('_')
61*effccb7dSTang Haojin    second_layer = path.split('/')[1]
62*effccb7dSTang Haojin    level = 1
63*effccb7dSTang Haojin    if second_layer.isdigit() and len(second_layer) > 1:  # workload/point/ ; work_load/point/
64*effccb7dSTang Haojin        workload = path.split('/')[0]
65*effccb7dSTang Haojin        point = second_layer
66*effccb7dSTang Haojin        level = 2
67*effccb7dSTang Haojin    elif len(split_path) == 3 and split_path[1].isdigit():  # workload_point_xxx/
68*effccb7dSTang Haojin        workload = split_path[0]
69*effccb7dSTang Haojin        point = split_path[1]
70*effccb7dSTang Haojin    elif len(split_path) == 4 and split_path[2].isdigit():  # work_load_point_xxx/
71*effccb7dSTang Haojin        workload = split_path[0] + '_' + split_path[1]
72*effccb7dSTang Haojin        point = split_path[2]
73*effccb7dSTang Haojin    else:
74*effccb7dSTang Haojin        workload = path.split('/')[0]
75*effccb7dSTang Haojin        point = '0'
76*effccb7dSTang Haojin        level = 1
77*effccb7dSTang Haojin
78*effccb7dSTang Haojin    return workload, point, level
79*effccb7dSTang Haojin
80*effccb7dSTang Haojin
81*effccb7dSTang Haojindef strip_path(file_path: str, prefix_path: str):
82*effccb7dSTang Haojin    x = prefix_path.join(file_path.split(prefix_path)[1:])
83*effccb7dSTang Haojin    if prefix_path.startswith('.') and x.startswith('/'):
84*effccb7dSTang Haojin        x = x[1:]
85*effccb7dSTang Haojin    if prefix_path.startswith('/') and x.startswith('/'):
86*effccb7dSTang Haojin        x = x[1:]
87*effccb7dSTang Haojin    return x
88*effccb7dSTang Haojin
89*effccb7dSTang Haojin
90*effccb7dSTang Haojindef glob_stats(path: str, fname='x'):
91*effccb7dSTang Haojin    files = []
92*effccb7dSTang Haojin    # check for checkpoints conflict
93*effccb7dSTang Haojin    files_map = {}
94*effccb7dSTang Haojin
95*effccb7dSTang Haojin    probe_stat_path = find_file_in_maze(path, fname)  # use it to probe the directory layout
96*effccb7dSTang Haojin    workload, point, segments = workload_point_frompath(strip_path(probe_stat_path, path))
97*effccb7dSTang Haojin    for l2_dir in os.listdir(path):
98*effccb7dSTang Haojin        l2_path = osp.join(path, l2_dir)
99*effccb7dSTang Haojin        # workload/point
100*effccb7dSTang Haojin        if segments == 2:
101*effccb7dSTang Haojin            # two layer directory
102*effccb7dSTang Haojin            for l3_dir in os.listdir(l2_path):
103*effccb7dSTang Haojin                l3_path = osp.join(l2_path, l3_dir)
104*effccb7dSTang Haojin                if not osp.isdir(l3_path):
105*effccb7dSTang Haojin                    continue
106*effccb7dSTang Haojin                stat_path = find_file_in_maze(l3_path, fname)
107*effccb7dSTang Haojin                if stat_path is not None:
108*effccb7dSTang Haojin                    workload, point, _ = workload_point_frompath(strip_path(stat_path, path))
109*effccb7dSTang Haojin                    point_identifier = workload + '_' + point
110*effccb7dSTang Haojin                    files_map.update({point_identifier: stat_path})
111*effccb7dSTang Haojin                    files.append((point_identifier, stat_path))
112*effccb7dSTang Haojin        else:
113*effccb7dSTang Haojin            # workload_point_xx/
114*effccb7dSTang Haojin            stat_path = find_file_in_maze(l2_path, fname)
115*effccb7dSTang Haojin            if stat_path is not None:
116*effccb7dSTang Haojin                workload, point, _ = workload_point_frompath(strip_path(stat_path, path))
117*effccb7dSTang Haojin                point_identifier = workload + '_' + point
118*effccb7dSTang Haojin                files_map.update({point_identifier: stat_path})
119*effccb7dSTang Haojin                files.append((point_identifier, stat_path))
120*effccb7dSTang Haojin    return files
121*effccb7dSTang Haojin
122*effccb7dSTang Haojin
123*effccb7dSTang Haojindef find_file_in_maze(path: str, stat_file='stats.txt'):
124*effccb7dSTang Haojin    file_path = osp.join(path, stat_file)
125*effccb7dSTang Haojin    if osp.isfile(file_path) or osp.islink(file_path):
126*effccb7dSTang Haojin        return file_path
127*effccb7dSTang Haojin    if not osp.isdir(path):
128*effccb7dSTang Haojin        return None
129*effccb7dSTang Haojin    for l2_dir in os.listdir(path):
130*effccb7dSTang Haojin        l2_path = osp.join(path, l2_dir)
131*effccb7dSTang Haojin        if not osp.isdir(l2_path):
132*effccb7dSTang Haojin            continue
133*effccb7dSTang Haojin        ret = find_file_in_maze(l2_path, stat_file)
134*effccb7dSTang Haojin        if ret is not None:
135*effccb7dSTang Haojin            return ret
136*effccb7dSTang Haojin    return None
137