xref: /XiangShan/scripts/top-down/top_down.py (revision f24210142010f7e7a701ca25e9fc1aa15595843e)
1from multiprocessing import Process, Manager
2import threading
3import os.path as osp
4import os
5import resource
6import json
7import argparse
8import psutil
9import numpy as np
10import pandas as pd
11import utils as u
12import configs as cf
13from draw import draw
14
15
16def batch():
17    paths = u.glob_stats(cf.stats_dir, fname='simulator_err.txt')
18
19    manager = Manager()
20    all_bmk_dict = manager.dict()
21
22    semaphore = threading.Semaphore(psutil.cpu_count())
23
24    # for workload, path in paths:
25    def extract_and_post_process(gloabl_dict, workload, path):
26        with semaphore:
27            flag_file = osp.join(osp.dirname(path), 'simulator_out.txt')
28            with open(flag_file, encoding='utf-8') as f:
29                contents = f.read()
30                if 'EXCEEDING CYCLE/INSTR LIMIT' not in contents and 'HIT GOOD TRAP' not in contents:
31                    print('Skip unfinished job:', workload)
32                    return
33
34            print('Process finished job:', workload)
35
36            d = u.xs_get_stats(path, cf.targets)
37            if len(d):
38
39                # add bmk and point after topdown processing
40                segments = workload.split('_')
41                if len(segments):
42                    d['point'] = segments[-1]
43                    d['workload'] = '_'.join(segments[:-1])
44                    d['bmk'] = segments[0]
45
46            gloabl_dict[workload] = d
47        return
48
49    jobs = [Process(target=extract_and_post_process, args=(
50        all_bmk_dict, workload, path)) for workload, path in paths]
51    _ = [p.start() for p in jobs]
52    _ = [p.join() for p in jobs]
53
54    df = pd.DataFrame.from_dict(all_bmk_dict, orient='index')
55    df = df.sort_index()
56    df = df.reindex(sorted(df.columns), axis=1)
57
58    df = df.fillna(0)
59
60    df.to_csv(cf.CSV_PATH, index=True)
61
62
63def proc_input(wl_df: pd.DataFrame, js: dict, workload: str):
64    # we implement the weighted metrics computation with the following formula:
65    # weight = vec_weight matmul matrix_perf
66    # (N, 1) = (1, W) matmul (W, N)
67    # To make sure the matrix_perf is in the same order as the vec_weight,
68    # we sort the matrix_perf by point
69    assert isinstance(wl_df.iloc[0]['point'], np.int64)
70    wl_df = wl_df.sort_values(by=['point'])
71    # We also sort the vec_weight by point
72    wl_js = dict(js[workload])
73    wl_df['cpi'] = 1.0 / wl_df['ipc']
74    vec_weight = pd.DataFrame.from_dict(wl_js['points'], orient='index')
75
76    # convert string index into int64
77    vec_weight.index = vec_weight.index.astype(np.int64)
78    # select only existing points
79    vec_weight = vec_weight.loc[wl_df['point']]
80    # make their sum equals 1.0
81    vec_weight.columns = ['weight']
82
83    vec_weight['weight'] = vec_weight['weight'].astype(np.float64)
84    coverage = np.sum(vec_weight.values)
85    vec_weight = vec_weight / coverage
86
87    # Drop these auxiliary fields
88    to_drop = {'bmk', 'point', 'workload', 'ipc'}
89    to_drop = to_drop.intersection(set(wl_df.columns.to_list()))
90    wl_df = wl_df.drop(to_drop, axis=1)
91
92    weight_metrics = np.matmul(vec_weight.values.reshape(1, -1), wl_df.values)
93    weight_metrics_df = pd.DataFrame(weight_metrics, columns=wl_df.columns)
94    # We have to process coverage here to avoid apply weight on top of weight
95    weight_metrics_df['coverage'] = coverage
96    return weight_metrics_df.values, weight_metrics_df.columns
97
98
99def proc_bmk(bmk_df: pd.DataFrame, js: dict):
100    # Similar to per-input proc, we view the instruction count as the weight
101    # and compute weighted metrics with matrix multiplication
102    workloads = bmk_df['workload'].unique()
103    metric_list = []
104    for wl in workloads:
105        metrics, cols = proc_input(bmk_df[bmk_df['workload'] == wl], js, wl)
106        metric_list.append(metrics)
107    metrics = np.concatenate(metric_list, axis=0)
108    metrics = pd.DataFrame(metrics, columns=cols)
109
110    input_dict = {}
111    for workload in workloads:
112        if workload.startswith(workload):
113            input_dict[workload] = int(js[workload]['insts'])
114    input_insts = pd.DataFrame.from_dict(
115        input_dict, orient='index', columns=['insts'])
116    # make their sum equals 1.0
117    vec_weight = input_insts / np.sum(input_insts.values)
118    weight_metric = np.matmul(vec_weight.values.reshape(1, -1), metrics.values)
119    return weight_metric, metrics.columns
120
121
122def compute_weighted_metrics():
123    df = pd.read_csv(cf.CSV_PATH, index_col=0)
124    bmks = df['bmk'].unique()
125    with open(cf.JSON_FILE, 'r', encoding='utf-8') as f:
126        js = json.load(f)
127    weighted = {}
128    for bmk in bmks:
129        if bmk not in cf.spec_bmks['06']['int'] and cf.INT_ONLY:
130            continue
131        if bmk not in cf.spec_bmks['06']['float'] and cf.FP_ONLY:
132            continue
133        df_bmk = df[df['bmk'] == bmk]
134        workloads = df_bmk['workload'].unique()
135        n_wl = len(workloads)
136        if n_wl == 1:
137            metrics, cols = proc_input(df_bmk, js, workloads[0])
138        else:
139            metrics, cols = proc_bmk(df_bmk, js)
140        weighted[bmk] = metrics[0]
141    weighted_df = pd.DataFrame.from_dict(
142        weighted, orient='index', columns=cols)
143    if 'cpi' in weighted_df.columns:
144        weighted_df = weighted_df.sort_values(by='cpi', ascending=False)
145    else:
146        weighted_df = weighted_df.sort_index()
147    weighted_df.to_csv(cf.OUT_CSV)
148
149
150if __name__ == '__main__':
151    parser = argparse.ArgumentParser(usage='generate top-down results')
152    parser.add_argument('-s', '--stat-dir', action='store', required=True,
153                        help='stat output directory')
154    parser.add_argument('-j', '--json', action='store', required=True,
155                        help='specify json file', default='resources/spec06_rv64gcb_o2_20m.json')
156    opt = parser.parse_args()
157    cf.stats_dir = opt.stat_dir
158    cf.JSON_FILE = opt.json
159    if not osp.exists('results'):
160        os.makedirs('results')
161    if resource.getrlimit(resource.RLIMIT_NOFILE)[0] <= 8192:
162        resource.setrlimit(resource.RLIMIT_NOFILE, (8192, 8192))
163
164    batch()
165    compute_weighted_metrics()
166    draw()
167