1import os 2import os.path as osp 3from os.path import expanduser as expu 4import re 5 6 7def to_num(x: str) -> (int, float): 8 if '.' in x: 9 return float(x) 10 return int(x) 11 12 13def xs_get_stats(stat_file: str, targets: list) -> dict: 14 15 if not os.path.isfile(expu(stat_file)): 16 print(stat_file) 17 assert os.path.isfile(expu(stat_file)) 18 with open(stat_file, encoding='utf-8') as f: 19 lines = f.read().splitlines() 20 21 if lines is None: 22 return None 23 24 patterns = {} 25 accumulate_table = {} # key: pattern, value: (count, [matched values]) 26 for k, p in targets.items(): 27 if isinstance(p, str): 28 patterns[k] = re.compile(p) 29 else: 30 patterns[k] = re.compile(p[0]) 31 accumulate_table[k] = (p[1], []) 32 stats = {} 33 34 for _, line in enumerate(lines): 35 for k, pattern in patterns.items(): 36 m = pattern.search(line) 37 if m is not None: 38 if k in accumulate_table: 39 accumulate_table[k][1].append(to_num(m.group(1))) 40 else: 41 stats[k] = to_num(m.group(1)) 42 break 43 for k, accumulate in accumulate_table: 44 stats[k] = sum(accumulate[1][-accumulate[0]:]) 45 46 desired_keys = set(patterns.keys()) 47 obtained_keys = set(stats.keys()) 48 not_found_keys = desired_keys - obtained_keys 49 if not_found_keys: 50 print(stat_file) 51 print(targets) 52 print(not_found_keys) 53 assert len(not_found_keys) == 0 54 55 stats['ipc'] = stats['commitInstr'] / stats['total_cycles'] 56 return stats 57 58 59def workload_point_frompath(path): 60 split_path = path.split('/')[0].split('_') 61 second_layer = path.split('/')[1] 62 level = 1 63 if second_layer.isdigit() and len(second_layer) > 1: # workload/point/ ; work_load/point/ 64 workload = path.split('/')[0] 65 point = second_layer 66 level = 2 67 elif len(split_path) == 3 and split_path[1].isdigit(): # workload_point_xxx/ 68 workload = split_path[0] 69 point = split_path[1] 70 elif len(split_path) == 4 and split_path[2].isdigit(): # work_load_point_xxx/ 71 workload = split_path[0] + '_' + split_path[1] 72 point = split_path[2] 73 else: 74 workload = path.split('/')[0] 75 point = '0' 76 level = 1 77 78 return workload, point, level 79 80 81def strip_path(file_path: str, prefix_path: str): 82 x = prefix_path.join(file_path.split(prefix_path)[1:]) 83 if prefix_path.startswith('.') and x.startswith('/'): 84 x = x[1:] 85 if prefix_path.startswith('/') and x.startswith('/'): 86 x = x[1:] 87 return x 88 89 90def glob_stats(path: str, fname='x'): 91 files = [] 92 # check for checkpoints conflict 93 files_map = {} 94 95 probe_stat_path = find_file_in_maze(path, fname) # use it to probe the directory layout 96 workload, point, segments = workload_point_frompath(strip_path(probe_stat_path, path)) 97 for l2_dir in os.listdir(path): 98 l2_path = osp.join(path, l2_dir) 99 # workload/point 100 if segments == 2: 101 # two layer directory 102 for l3_dir in os.listdir(l2_path): 103 l3_path = osp.join(l2_path, l3_dir) 104 if not osp.isdir(l3_path): 105 continue 106 stat_path = find_file_in_maze(l3_path, fname) 107 if stat_path is not None: 108 workload, point, _ = workload_point_frompath(strip_path(stat_path, path)) 109 point_identifier = workload + '_' + point 110 files_map.update({point_identifier: stat_path}) 111 files.append((point_identifier, stat_path)) 112 else: 113 # workload_point_xx/ 114 stat_path = find_file_in_maze(l2_path, fname) 115 if stat_path is not None: 116 workload, point, _ = workload_point_frompath(strip_path(stat_path, path)) 117 point_identifier = workload + '_' + point 118 files_map.update({point_identifier: stat_path}) 119 files.append((point_identifier, stat_path)) 120 return files 121 122 123def find_file_in_maze(path: str, stat_file='stats.txt'): 124 file_path = osp.join(path, stat_file) 125 if osp.isfile(file_path) or osp.islink(file_path): 126 return file_path 127 if not osp.isdir(path): 128 return None 129 for l2_dir in os.listdir(path): 130 l2_path = osp.join(path, l2_dir) 131 if not osp.isdir(l2_path): 132 continue 133 ret = find_file_in_maze(l2_path, stat_file) 134 if ret is not None: 135 return ret 136 return None 137