1from multiprocessing import Process, Manager 2import threading 3import os.path as osp 4import os 5import resource 6import json 7import argparse 8import psutil 9import numpy as np 10import pandas as pd 11import utils as u 12import configs as cf 13from draw import draw 14 15 16def batch(): 17 paths = u.glob_stats(cf.stats_dir, fname='simulator_err.txt') 18 19 manager = Manager() 20 all_bmk_dict = manager.dict() 21 22 semaphore = threading.Semaphore(psutil.cpu_count()) 23 24 # for workload, path in paths: 25 def extract_and_post_process(gloabl_dict, workload, path): 26 with semaphore: 27 flag_file = osp.join(osp.dirname(path), 'simulator_out.txt') 28 with open(flag_file, encoding='utf-8') as f: 29 contents = f.read() 30 if 'EXCEEDING CYCLE/INSTR LIMIT' not in contents and 'HIT GOOD TRAP' not in contents: 31 print('Skip unfinished job:', workload) 32 return 33 34 print('Process finished job:', workload) 35 36 d = u.xs_get_stats(path, cf.targets) 37 if len(d): 38 39 # add bmk and point after topdown processing 40 segments = workload.split('_') 41 if len(segments): 42 d['point'] = segments[-1] 43 d['workload'] = '_'.join(segments[:-1]) 44 d['bmk'] = segments[0] 45 46 gloabl_dict[workload] = d 47 return 48 49 jobs = [Process(target=extract_and_post_process, args=( 50 all_bmk_dict, workload, path)) for workload, path in paths] 51 _ = [p.start() for p in jobs] 52 _ = [p.join() for p in jobs] 53 54 df = pd.DataFrame.from_dict(all_bmk_dict, orient='index') 55 df = df.sort_index() 56 df = df.reindex(sorted(df.columns), axis=1) 57 58 df = df.fillna(0) 59 60 df.to_csv(cf.CSV_PATH, index=True) 61 62 63def proc_input(wl_df: pd.DataFrame, js: dict, workload: str): 64 # we implement the weighted metrics computation with the following formula: 65 # weight = vec_weight matmul matrix_perf 66 # (N, 1) = (1, W) matmul (W, N) 67 # To make sure the matrix_perf is in the same order as the vec_weight, 68 # we sort the matrix_perf by point 69 assert isinstance(wl_df.iloc[0]['point'], np.int64) 70 wl_df = wl_df.sort_values(by=['point']) 71 # We also sort the vec_weight by point 72 wl_js = dict(js[workload]) 73 wl_df['cpi'] = 1.0 / wl_df['ipc'] 74 vec_weight = pd.DataFrame.from_dict(wl_js['points'], orient='index') 75 76 # convert string index into int64 77 vec_weight.index = vec_weight.index.astype(np.int64) 78 # select only existing points 79 vec_weight = vec_weight.loc[wl_df['point']] 80 # make their sum equals 1.0 81 vec_weight.columns = ['weight'] 82 83 vec_weight['weight'] = vec_weight['weight'].astype(np.float64) 84 coverage = np.sum(vec_weight.values) 85 vec_weight = vec_weight / coverage 86 87 # Drop these auxiliary fields 88 to_drop = {'bmk', 'point', 'workload', 'ipc'} 89 to_drop = to_drop.intersection(set(wl_df.columns.to_list())) 90 wl_df = wl_df.drop(to_drop, axis=1) 91 92 weight_metrics = np.matmul(vec_weight.values.reshape(1, -1), wl_df.values) 93 weight_metrics_df = pd.DataFrame(weight_metrics, columns=wl_df.columns) 94 # We have to process coverage here to avoid apply weight on top of weight 95 weight_metrics_df['coverage'] = coverage 96 return weight_metrics_df.values, weight_metrics_df.columns 97 98 99def proc_bmk(bmk_df: pd.DataFrame, js: dict): 100 # Similar to per-input proc, we view the instruction count as the weight 101 # and compute weighted metrics with matrix multiplication 102 workloads = bmk_df['workload'].unique() 103 metric_list = [] 104 for wl in workloads: 105 metrics, cols = proc_input(bmk_df[bmk_df['workload'] == wl], js, wl) 106 metric_list.append(metrics) 107 metrics = np.concatenate(metric_list, axis=0) 108 metrics = pd.DataFrame(metrics, columns=cols) 109 110 input_dict = {} 111 for workload in workloads: 112 if workload.startswith(workload): 113 input_dict[workload] = int(js[workload]['insts']) 114 input_insts = pd.DataFrame.from_dict( 115 input_dict, orient='index', columns=['insts']) 116 # make their sum equals 1.0 117 vec_weight = input_insts / np.sum(input_insts.values) 118 weight_metric = np.matmul(vec_weight.values.reshape(1, -1), metrics.values) 119 return weight_metric, metrics.columns 120 121 122def compute_weighted_metrics(): 123 df = pd.read_csv(cf.CSV_PATH, index_col=0) 124 bmks = df['bmk'].unique() 125 with open(cf.JSON_FILE, 'r', encoding='utf-8') as f: 126 js = json.load(f) 127 weighted = {} 128 for bmk in bmks: 129 if bmk not in cf.spec_bmks['06']['int'] and cf.INT_ONLY: 130 continue 131 if bmk not in cf.spec_bmks['06']['float'] and cf.FP_ONLY: 132 continue 133 df_bmk = df[df['bmk'] == bmk] 134 workloads = df_bmk['workload'].unique() 135 n_wl = len(workloads) 136 if n_wl == 1: 137 metrics, cols = proc_input(df_bmk, js, workloads[0]) 138 else: 139 metrics, cols = proc_bmk(df_bmk, js) 140 weighted[bmk] = metrics[0] 141 weighted_df = pd.DataFrame.from_dict( 142 weighted, orient='index', columns=cols) 143 if 'cpi' in weighted_df.columns: 144 weighted_df = weighted_df.sort_values(by='cpi', ascending=False) 145 else: 146 weighted_df = weighted_df.sort_index() 147 weighted_df.to_csv(cf.OUT_CSV) 148 149 150if __name__ == '__main__': 151 parser = argparse.ArgumentParser(usage='generate top-down results') 152 parser.add_argument('-s', '--stat-dir', action='store', required=True, 153 help='stat output directory') 154 parser.add_argument('-j', '--json', action='store', required=True, 155 help='specify json file', default='resources/spec06_rv64gcb_o2_20m.json') 156 opt = parser.parse_args() 157 cf.stats_dir = opt.stat_dir 158 cf.JSON_FILE = opt.json 159 if not osp.exists('results'): 160 os.makedirs('results') 161 if resource.getrlimit(resource.RLIMIT_NOFILE)[0] <= 8192: 162 resource.setrlimit(resource.RLIMIT_NOFILE, (8192, 8192)) 163 164 batch() 165 compute_weighted_metrics() 166 draw() 167