xref: /XiangShan/scripts/top-down/top_down.py (revision f24210142010f7e7a701ca25e9fc1aa15595843e)
1effccb7dSTang Haojinfrom multiprocessing import Process, Manager
2effccb7dSTang Haojinimport threading
3effccb7dSTang Haojinimport os.path as osp
4effccb7dSTang Haojinimport os
5effccb7dSTang Haojinimport resource
6effccb7dSTang Haojinimport json
7effccb7dSTang Haojinimport argparse
8effccb7dSTang Haojinimport psutil
9effccb7dSTang Haojinimport numpy as np
10effccb7dSTang Haojinimport pandas as pd
11effccb7dSTang Haojinimport utils as u
12effccb7dSTang Haojinimport configs as cf
13effccb7dSTang Haojinfrom draw import draw
14eb163ef0SHaojin Tang
15eb163ef0SHaojin Tang
16effccb7dSTang Haojindef batch():
17effccb7dSTang Haojin    paths = u.glob_stats(cf.stats_dir, fname='simulator_err.txt')
18effccb7dSTang Haojin
19effccb7dSTang Haojin    manager = Manager()
20effccb7dSTang Haojin    all_bmk_dict = manager.dict()
21effccb7dSTang Haojin
22effccb7dSTang Haojin    semaphore = threading.Semaphore(psutil.cpu_count())
23effccb7dSTang Haojin
24effccb7dSTang Haojin    # for workload, path in paths:
25effccb7dSTang Haojin    def extract_and_post_process(gloabl_dict, workload, path):
26effccb7dSTang Haojin        with semaphore:
27effccb7dSTang Haojin            flag_file = osp.join(osp.dirname(path), 'simulator_out.txt')
28effccb7dSTang Haojin            with open(flag_file, encoding='utf-8') as f:
29effccb7dSTang Haojin                contents = f.read()
30effccb7dSTang Haojin                if 'EXCEEDING CYCLE/INSTR LIMIT' not in contents and 'HIT GOOD TRAP' not in contents:
31effccb7dSTang Haojin                    print('Skip unfinished job:', workload)
32effccb7dSTang Haojin                    return
33effccb7dSTang Haojin
34effccb7dSTang Haojin            print('Process finished job:', workload)
35effccb7dSTang Haojin
36effccb7dSTang Haojin            d = u.xs_get_stats(path, cf.targets)
37effccb7dSTang Haojin            if len(d):
38effccb7dSTang Haojin
39effccb7dSTang Haojin                # add bmk and point after topdown processing
40effccb7dSTang Haojin                segments = workload.split('_')
41effccb7dSTang Haojin                if len(segments):
42effccb7dSTang Haojin                    d['point'] = segments[-1]
43effccb7dSTang Haojin                    d['workload'] = '_'.join(segments[:-1])
44effccb7dSTang Haojin                    d['bmk'] = segments[0]
45effccb7dSTang Haojin
46effccb7dSTang Haojin            gloabl_dict[workload] = d
47effccb7dSTang Haojin        return
48effccb7dSTang Haojin
49effccb7dSTang Haojin    jobs = [Process(target=extract_and_post_process, args=(
50effccb7dSTang Haojin        all_bmk_dict, workload, path)) for workload, path in paths]
51effccb7dSTang Haojin    _ = [p.start() for p in jobs]
52effccb7dSTang Haojin    _ = [p.join() for p in jobs]
53effccb7dSTang Haojin
54effccb7dSTang Haojin    df = pd.DataFrame.from_dict(all_bmk_dict, orient='index')
55effccb7dSTang Haojin    df = df.sort_index()
56effccb7dSTang Haojin    df = df.reindex(sorted(df.columns), axis=1)
57effccb7dSTang Haojin
58effccb7dSTang Haojin    df = df.fillna(0)
59effccb7dSTang Haojin
60effccb7dSTang Haojin    df.to_csv(cf.CSV_PATH, index=True)
61effccb7dSTang Haojin
62effccb7dSTang Haojin
63effccb7dSTang Haojindef proc_input(wl_df: pd.DataFrame, js: dict, workload: str):
64effccb7dSTang Haojin    # we implement the weighted metrics computation with the following formula:
65effccb7dSTang Haojin    # weight = vec_weight matmul matrix_perf
66effccb7dSTang Haojin    # (N, 1) = (1, W) matmul (W, N)
67effccb7dSTang Haojin    # To make sure the matrix_perf is in the same order as the vec_weight,
68effccb7dSTang Haojin    # we sort the matrix_perf by point
69*f2421014SYanqin Li    assert isinstance(wl_df.iloc[0]['point'], np.int64)
70effccb7dSTang Haojin    wl_df = wl_df.sort_values(by=['point'])
71effccb7dSTang Haojin    # We also sort the vec_weight by point
72effccb7dSTang Haojin    wl_js = dict(js[workload])
73effccb7dSTang Haojin    wl_df['cpi'] = 1.0 / wl_df['ipc']
74effccb7dSTang Haojin    vec_weight = pd.DataFrame.from_dict(wl_js['points'], orient='index')
75effccb7dSTang Haojin
76effccb7dSTang Haojin    # convert string index into int64
77effccb7dSTang Haojin    vec_weight.index = vec_weight.index.astype(np.int64)
78effccb7dSTang Haojin    # select only existing points
79effccb7dSTang Haojin    vec_weight = vec_weight.loc[wl_df['point']]
80effccb7dSTang Haojin    # make their sum equals 1.0
81effccb7dSTang Haojin    vec_weight.columns = ['weight']
82effccb7dSTang Haojin
83effccb7dSTang Haojin    vec_weight['weight'] = vec_weight['weight'].astype(np.float64)
84effccb7dSTang Haojin    coverage = np.sum(vec_weight.values)
85effccb7dSTang Haojin    vec_weight = vec_weight / coverage
86effccb7dSTang Haojin
87effccb7dSTang Haojin    # Drop these auxiliary fields
88effccb7dSTang Haojin    to_drop = {'bmk', 'point', 'workload', 'ipc'}
89effccb7dSTang Haojin    to_drop = to_drop.intersection(set(wl_df.columns.to_list()))
90effccb7dSTang Haojin    wl_df = wl_df.drop(to_drop, axis=1)
91effccb7dSTang Haojin
92effccb7dSTang Haojin    weight_metrics = np.matmul(vec_weight.values.reshape(1, -1), wl_df.values)
93effccb7dSTang Haojin    weight_metrics_df = pd.DataFrame(weight_metrics, columns=wl_df.columns)
94effccb7dSTang Haojin    # We have to process coverage here to avoid apply weight on top of weight
95effccb7dSTang Haojin    weight_metrics_df['coverage'] = coverage
96effccb7dSTang Haojin    return weight_metrics_df.values, weight_metrics_df.columns
97effccb7dSTang Haojin
98effccb7dSTang Haojin
99effccb7dSTang Haojindef proc_bmk(bmk_df: pd.DataFrame, js: dict):
100effccb7dSTang Haojin    # Similar to per-input proc, we view the instruction count as the weight
101effccb7dSTang Haojin    # and compute weighted metrics with matrix multiplication
102effccb7dSTang Haojin    workloads = bmk_df['workload'].unique()
103effccb7dSTang Haojin    metric_list = []
104effccb7dSTang Haojin    for wl in workloads:
105effccb7dSTang Haojin        metrics, cols = proc_input(bmk_df[bmk_df['workload'] == wl], js, wl)
106effccb7dSTang Haojin        metric_list.append(metrics)
107effccb7dSTang Haojin    metrics = np.concatenate(metric_list, axis=0)
108effccb7dSTang Haojin    metrics = pd.DataFrame(metrics, columns=cols)
109effccb7dSTang Haojin
110effccb7dSTang Haojin    input_dict = {}
111effccb7dSTang Haojin    for workload in workloads:
112effccb7dSTang Haojin        if workload.startswith(workload):
113effccb7dSTang Haojin            input_dict[workload] = int(js[workload]['insts'])
114effccb7dSTang Haojin    input_insts = pd.DataFrame.from_dict(
115effccb7dSTang Haojin        input_dict, orient='index', columns=['insts'])
116effccb7dSTang Haojin    # make their sum equals 1.0
117effccb7dSTang Haojin    vec_weight = input_insts / np.sum(input_insts.values)
118effccb7dSTang Haojin    weight_metric = np.matmul(vec_weight.values.reshape(1, -1), metrics.values)
119effccb7dSTang Haojin    return weight_metric, metrics.columns
120effccb7dSTang Haojin
121effccb7dSTang Haojin
122effccb7dSTang Haojindef compute_weighted_metrics():
123effccb7dSTang Haojin    df = pd.read_csv(cf.CSV_PATH, index_col=0)
124effccb7dSTang Haojin    bmks = df['bmk'].unique()
125effccb7dSTang Haojin    with open(cf.JSON_FILE, 'r', encoding='utf-8') as f:
126effccb7dSTang Haojin        js = json.load(f)
127effccb7dSTang Haojin    weighted = {}
128effccb7dSTang Haojin    for bmk in bmks:
129effccb7dSTang Haojin        if bmk not in cf.spec_bmks['06']['int'] and cf.INT_ONLY:
130effccb7dSTang Haojin            continue
131effccb7dSTang Haojin        if bmk not in cf.spec_bmks['06']['float'] and cf.FP_ONLY:
132effccb7dSTang Haojin            continue
133effccb7dSTang Haojin        df_bmk = df[df['bmk'] == bmk]
134effccb7dSTang Haojin        workloads = df_bmk['workload'].unique()
135effccb7dSTang Haojin        n_wl = len(workloads)
136effccb7dSTang Haojin        if n_wl == 1:
137effccb7dSTang Haojin            metrics, cols = proc_input(df_bmk, js, workloads[0])
138eb163ef0SHaojin Tang        else:
139effccb7dSTang Haojin            metrics, cols = proc_bmk(df_bmk, js)
140effccb7dSTang Haojin        weighted[bmk] = metrics[0]
141effccb7dSTang Haojin    weighted_df = pd.DataFrame.from_dict(
142effccb7dSTang Haojin        weighted, orient='index', columns=cols)
143effccb7dSTang Haojin    if 'cpi' in weighted_df.columns:
144effccb7dSTang Haojin        weighted_df = weighted_df.sort_values(by='cpi', ascending=False)
145effccb7dSTang Haojin    else:
146effccb7dSTang Haojin        weighted_df = weighted_df.sort_index()
147effccb7dSTang Haojin    weighted_df.to_csv(cf.OUT_CSV)
148eb163ef0SHaojin Tang
149eb163ef0SHaojin Tang
150effccb7dSTang Haojinif __name__ == '__main__':
151effccb7dSTang Haojin    parser = argparse.ArgumentParser(usage='generate top-down results')
152effccb7dSTang Haojin    parser.add_argument('-s', '--stat-dir', action='store', required=True,
153effccb7dSTang Haojin                        help='stat output directory')
154effccb7dSTang Haojin    parser.add_argument('-j', '--json', action='store', required=True,
155effccb7dSTang Haojin                        help='specify json file', default='resources/spec06_rv64gcb_o2_20m.json')
156effccb7dSTang Haojin    opt = parser.parse_args()
157effccb7dSTang Haojin    cf.stats_dir = opt.stat_dir
158effccb7dSTang Haojin    cf.JSON_FILE = opt.json
159effccb7dSTang Haojin    if not osp.exists('results'):
160effccb7dSTang Haojin        os.makedirs('results')
161effccb7dSTang Haojin    if resource.getrlimit(resource.RLIMIT_NOFILE)[0] <= 8192:
162effccb7dSTang Haojin        resource.setrlimit(resource.RLIMIT_NOFILE, (8192, 8192))
163eb163ef0SHaojin Tang
164effccb7dSTang Haojin    batch()
165effccb7dSTang Haojin    compute_weighted_metrics()
166effccb7dSTang Haojin    draw()
167