1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * turbostat -- show CPU frequency and C-state residency
4 * on modern Intel and AMD processors.
5 *
6 * Copyright (c) 2025 Intel Corporation.
7 * Len Brown <[email protected]>
8 */
9
10 #define _GNU_SOURCE
11 #include MSRHEADER
12
13 // copied from arch/x86/include/asm/cpu_device_id.h
14 #define VFM_MODEL_BIT 0
15 #define VFM_FAMILY_BIT 8
16 #define VFM_VENDOR_BIT 16
17 #define VFM_RSVD_BIT 24
18
19 #define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT)
20 #define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT)
21 #define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT)
22
23 #define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT)
24 #define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT)
25 #define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT)
26
27 #define VFM_MAKE(_vendor, _family, _model) ( \
28 ((_model) << VFM_MODEL_BIT) | \
29 ((_family) << VFM_FAMILY_BIT) | \
30 ((_vendor) << VFM_VENDOR_BIT) \
31 )
32 // end copied section
33
34 #define CPUID_LEAF_MODEL_ID 0x1A
35 #define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24
36
37 #define X86_VENDOR_INTEL 0
38
39 #include INTEL_FAMILY_HEADER
40 #include BUILD_BUG_HEADER
41 #include <stdarg.h>
42 #include <stdio.h>
43 #include <err.h>
44 #include <unistd.h>
45 #include <sys/types.h>
46 #include <sys/wait.h>
47 #include <sys/stat.h>
48 #include <sys/select.h>
49 #include <sys/resource.h>
50 #include <sys/mman.h>
51 #include <fcntl.h>
52 #include <signal.h>
53 #include <sys/time.h>
54 #include <stdlib.h>
55 #include <getopt.h>
56 #include <dirent.h>
57 #include <string.h>
58 #include <ctype.h>
59 #include <sched.h>
60 #include <time.h>
61 #include <cpuid.h>
62 #include <sys/capability.h>
63 #include <errno.h>
64 #include <math.h>
65 #include <linux/perf_event.h>
66 #include <asm/unistd.h>
67 #include <stdbool.h>
68 #include <assert.h>
69 #include <linux/kernel.h>
70
71 #define UNUSED(x) (void)(x)
72
73 /*
74 * This list matches the column headers, except
75 * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
76 * 2. Core and CPU are moved to the end, we can't have strings that contain them
77 * matching on them for --show and --hide.
78 */
79
80 /*
81 * buffer size used by sscanf() for added column names
82 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
83 */
84 #define NAME_BYTES 20
85 #define PATH_BYTES 128
86 #define PERF_NAME_BYTES 128
87
88 #define MAX_NOFILE 0x8000
89
90 #define COUNTER_KIND_PERF_PREFIX "perf/"
91 #define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX)
92 #define PERF_DEV_NAME_BYTES 32
93 #define PERF_EVT_NAME_BYTES 32
94
95 #define INTEL_ECORE_TYPE 0x20
96 #define INTEL_PCORE_TYPE 0x40
97
98 #define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL))
99
100 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
101 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M };
102 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE };
103 enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR };
104
105 struct perf_counter_info {
106 struct perf_counter_info *next;
107
108 /* How to open the counter / What counter it is. */
109 char device[PERF_DEV_NAME_BYTES];
110 char event[PERF_EVT_NAME_BYTES];
111
112 /* How to show/format the counter. */
113 char name[PERF_NAME_BYTES];
114 unsigned int width;
115 enum counter_scope scope;
116 enum counter_type type;
117 enum counter_format format;
118 double scale;
119
120 /* For reading the counter. */
121 int *fd_perf_per_domain;
122 size_t num_domains;
123 };
124
125 struct sysfs_path {
126 char path[PATH_BYTES];
127 int id;
128 struct sysfs_path *next;
129 };
130
131 struct msr_counter {
132 unsigned int msr_num;
133 char name[NAME_BYTES];
134 struct sysfs_path *sp;
135 unsigned int width;
136 enum counter_type type;
137 enum counter_format format;
138 struct msr_counter *next;
139 unsigned int flags;
140 #define FLAGS_HIDE (1 << 0)
141 #define FLAGS_SHOW (1 << 1)
142 #define SYSFS_PERCPU (1 << 1)
143 };
144
145 struct msr_counter bic[] = {
146 { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 },
147 { 0x0, "Time_Of_Day_Seconds", NULL, 0, 0, 0, NULL, 0 },
148 { 0x0, "Package", NULL, 0, 0, 0, NULL, 0 },
149 { 0x0, "Node", NULL, 0, 0, 0, NULL, 0 },
150 { 0x0, "Avg_MHz", NULL, 0, 0, 0, NULL, 0 },
151 { 0x0, "Busy%", NULL, 0, 0, 0, NULL, 0 },
152 { 0x0, "Bzy_MHz", NULL, 0, 0, 0, NULL, 0 },
153 { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 },
154 { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 },
155 { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 },
156 { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 },
157 { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 },
158 { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 },
159 { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 },
160 { 0x0, "CPU%c7", NULL, 0, 0, 0, NULL, 0 },
161 { 0x0, "ThreadC", NULL, 0, 0, 0, NULL, 0 },
162 { 0x0, "CoreTmp", NULL, 0, 0, 0, NULL, 0 },
163 { 0x0, "CoreCnt", NULL, 0, 0, 0, NULL, 0 },
164 { 0x0, "PkgTmp", NULL, 0, 0, 0, NULL, 0 },
165 { 0x0, "GFX%rc6", NULL, 0, 0, 0, NULL, 0 },
166 { 0x0, "GFXMHz", NULL, 0, 0, 0, NULL, 0 },
167 { 0x0, "Pkg%pc2", NULL, 0, 0, 0, NULL, 0 },
168 { 0x0, "Pkg%pc3", NULL, 0, 0, 0, NULL, 0 },
169 { 0x0, "Pkg%pc6", NULL, 0, 0, 0, NULL, 0 },
170 { 0x0, "Pkg%pc7", NULL, 0, 0, 0, NULL, 0 },
171 { 0x0, "Pkg%pc8", NULL, 0, 0, 0, NULL, 0 },
172 { 0x0, "Pkg%pc9", NULL, 0, 0, 0, NULL, 0 },
173 { 0x0, "Pk%pc10", NULL, 0, 0, 0, NULL, 0 },
174 { 0x0, "CPU%LPI", NULL, 0, 0, 0, NULL, 0 },
175 { 0x0, "SYS%LPI", NULL, 0, 0, 0, NULL, 0 },
176 { 0x0, "PkgWatt", NULL, 0, 0, 0, NULL, 0 },
177 { 0x0, "CorWatt", NULL, 0, 0, 0, NULL, 0 },
178 { 0x0, "GFXWatt", NULL, 0, 0, 0, NULL, 0 },
179 { 0x0, "PkgCnt", NULL, 0, 0, 0, NULL, 0 },
180 { 0x0, "RAMWatt", NULL, 0, 0, 0, NULL, 0 },
181 { 0x0, "PKG_%", NULL, 0, 0, 0, NULL, 0 },
182 { 0x0, "RAM_%", NULL, 0, 0, 0, NULL, 0 },
183 { 0x0, "Pkg_J", NULL, 0, 0, 0, NULL, 0 },
184 { 0x0, "Cor_J", NULL, 0, 0, 0, NULL, 0 },
185 { 0x0, "GFX_J", NULL, 0, 0, 0, NULL, 0 },
186 { 0x0, "RAM_J", NULL, 0, 0, 0, NULL, 0 },
187 { 0x0, "Mod%c6", NULL, 0, 0, 0, NULL, 0 },
188 { 0x0, "Totl%C0", NULL, 0, 0, 0, NULL, 0 },
189 { 0x0, "Any%C0", NULL, 0, 0, 0, NULL, 0 },
190 { 0x0, "GFX%C0", NULL, 0, 0, 0, NULL, 0 },
191 { 0x0, "CPUGFX%", NULL, 0, 0, 0, NULL, 0 },
192 { 0x0, "Core", NULL, 0, 0, 0, NULL, 0 },
193 { 0x0, "CPU", NULL, 0, 0, 0, NULL, 0 },
194 { 0x0, "APIC", NULL, 0, 0, 0, NULL, 0 },
195 { 0x0, "X2APIC", NULL, 0, 0, 0, NULL, 0 },
196 { 0x0, "Die", NULL, 0, 0, 0, NULL, 0 },
197 { 0x0, "GFXAMHz", NULL, 0, 0, 0, NULL, 0 },
198 { 0x0, "IPC", NULL, 0, 0, 0, NULL, 0 },
199 { 0x0, "CoreThr", NULL, 0, 0, 0, NULL, 0 },
200 { 0x0, "UncMHz", NULL, 0, 0, 0, NULL, 0 },
201 { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 },
202 { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 },
203 { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 },
204 { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 },
205 { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 },
206 { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
207 { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
208 { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
209 };
210
211 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
212 #define BIC_USEC (1ULL << 0)
213 #define BIC_TOD (1ULL << 1)
214 #define BIC_Package (1ULL << 2)
215 #define BIC_Node (1ULL << 3)
216 #define BIC_Avg_MHz (1ULL << 4)
217 #define BIC_Busy (1ULL << 5)
218 #define BIC_Bzy_MHz (1ULL << 6)
219 #define BIC_TSC_MHz (1ULL << 7)
220 #define BIC_IRQ (1ULL << 8)
221 #define BIC_SMI (1ULL << 9)
222 #define BIC_sysfs (1ULL << 10)
223 #define BIC_CPU_c1 (1ULL << 11)
224 #define BIC_CPU_c3 (1ULL << 12)
225 #define BIC_CPU_c6 (1ULL << 13)
226 #define BIC_CPU_c7 (1ULL << 14)
227 #define BIC_ThreadC (1ULL << 15)
228 #define BIC_CoreTmp (1ULL << 16)
229 #define BIC_CoreCnt (1ULL << 17)
230 #define BIC_PkgTmp (1ULL << 18)
231 #define BIC_GFX_rc6 (1ULL << 19)
232 #define BIC_GFXMHz (1ULL << 20)
233 #define BIC_Pkgpc2 (1ULL << 21)
234 #define BIC_Pkgpc3 (1ULL << 22)
235 #define BIC_Pkgpc6 (1ULL << 23)
236 #define BIC_Pkgpc7 (1ULL << 24)
237 #define BIC_Pkgpc8 (1ULL << 25)
238 #define BIC_Pkgpc9 (1ULL << 26)
239 #define BIC_Pkgpc10 (1ULL << 27)
240 #define BIC_CPU_LPI (1ULL << 28)
241 #define BIC_SYS_LPI (1ULL << 29)
242 #define BIC_PkgWatt (1ULL << 30)
243 #define BIC_CorWatt (1ULL << 31)
244 #define BIC_GFXWatt (1ULL << 32)
245 #define BIC_PkgCnt (1ULL << 33)
246 #define BIC_RAMWatt (1ULL << 34)
247 #define BIC_PKG__ (1ULL << 35)
248 #define BIC_RAM__ (1ULL << 36)
249 #define BIC_Pkg_J (1ULL << 37)
250 #define BIC_Cor_J (1ULL << 38)
251 #define BIC_GFX_J (1ULL << 39)
252 #define BIC_RAM_J (1ULL << 40)
253 #define BIC_Mod_c6 (1ULL << 41)
254 #define BIC_Totl_c0 (1ULL << 42)
255 #define BIC_Any_c0 (1ULL << 43)
256 #define BIC_GFX_c0 (1ULL << 44)
257 #define BIC_CPUGFX (1ULL << 45)
258 #define BIC_Core (1ULL << 46)
259 #define BIC_CPU (1ULL << 47)
260 #define BIC_APIC (1ULL << 48)
261 #define BIC_X2APIC (1ULL << 49)
262 #define BIC_Die (1ULL << 50)
263 #define BIC_GFXACTMHz (1ULL << 51)
264 #define BIC_IPC (1ULL << 52)
265 #define BIC_CORE_THROT_CNT (1ULL << 53)
266 #define BIC_UNCORE_MHZ (1ULL << 54)
267 #define BIC_SAM_mc6 (1ULL << 55)
268 #define BIC_SAMMHz (1ULL << 56)
269 #define BIC_SAMACTMHz (1ULL << 57)
270 #define BIC_Diec6 (1ULL << 58)
271 #define BIC_SysWatt (1ULL << 59)
272 #define BIC_Sys_J (1ULL << 60)
273 #define BIC_NMI (1ULL << 61)
274 #define BIC_CPU_c1e (1ULL << 62)
275
276 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
277 #define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
278 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
279 #define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
280 #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
281
282 #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
283
284 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
285 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
286
287 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
288 #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
289 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
290 #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
291 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
292 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
293
294 /*
295 * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
296 * If you change the values, note they are used both in comparisons
297 * (>= PCL__7) and to index pkg_cstate_limit_strings[].
298 */
299 #define PCLUKN 0 /* Unknown */
300 #define PCLRSV 1 /* Reserved */
301 #define PCL__0 2 /* PC0 */
302 #define PCL__1 3 /* PC1 */
303 #define PCL__2 4 /* PC2 */
304 #define PCL__3 5 /* PC3 */
305 #define PCL__4 6 /* PC4 */
306 #define PCL__6 7 /* PC6 */
307 #define PCL_6N 8 /* PC6 No Retention */
308 #define PCL_6R 9 /* PC6 Retention */
309 #define PCL__7 10 /* PC7 */
310 #define PCL_7S 11 /* PC7 Shrink */
311 #define PCL__8 12 /* PC8 */
312 #define PCL__9 13 /* PC9 */
313 #define PCL_10 14 /* PC10 */
314 #define PCLUNL 15 /* Unlimited */
315
316 struct amperf_group_fd;
317
318 char *proc_stat = "/proc/stat";
319 FILE *outf;
320 int *fd_percpu;
321 int *fd_instr_count_percpu;
322 struct timeval interval_tv = { 5, 0 };
323 struct timespec interval_ts = { 5, 0 };
324
325 unsigned int num_iterations;
326 unsigned int header_iterations;
327 unsigned int debug;
328 unsigned int quiet;
329 unsigned int shown;
330 unsigned int sums_need_wide_columns;
331 unsigned int rapl_joules;
332 unsigned int summary_only;
333 unsigned int list_header_only;
334 unsigned int dump_only;
335 unsigned int force_load;
336 unsigned int has_aperf;
337 unsigned int has_aperf_access;
338 unsigned int has_epb;
339 unsigned int has_turbo;
340 unsigned int is_hybrid;
341 unsigned int units = 1000000; /* MHz etc */
342 unsigned int genuine_intel;
343 unsigned int authentic_amd;
344 unsigned int hygon_genuine;
345 unsigned int max_level, max_extended_level;
346 unsigned int has_invariant_tsc;
347 unsigned int aperf_mperf_multiplier = 1;
348 double bclk;
349 double base_hz;
350 unsigned int has_base_hz;
351 double tsc_tweak = 1.0;
352 unsigned int show_pkg_only;
353 unsigned int show_core_only;
354 char *output_buffer, *outp;
355 unsigned int do_dts;
356 unsigned int do_ptm;
357 unsigned int do_ipc;
358 unsigned long long cpuidle_cur_cpu_lpi_us;
359 unsigned long long cpuidle_cur_sys_lpi_us;
360 unsigned int tj_max;
361 unsigned int tj_max_override;
362 double rapl_power_units, rapl_time_units;
363 double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units;
364 double rapl_joule_counter_range;
365 unsigned int crystal_hz;
366 unsigned long long tsc_hz;
367 int base_cpu;
368 unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
369 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
370 unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
371 unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
372 unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
373 unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
374 unsigned int first_counter_read = 1;
375
376 static struct timeval procsysfs_tv_begin;
377
378 int ignore_stdin;
379 bool no_msr;
380 bool no_perf;
381
382 enum gfx_sysfs_idx {
383 GFX_rc6,
384 GFX_MHz,
385 GFX_ACTMHz,
386 SAM_mc6,
387 SAM_MHz,
388 SAM_ACTMHz,
389 GFX_MAX
390 };
391
392 struct gfx_sysfs_info {
393 FILE *fp;
394 unsigned int val;
395 unsigned long long val_ull;
396 };
397
398 static struct gfx_sysfs_info gfx_info[GFX_MAX];
399
400 int get_msr(int cpu, off_t offset, unsigned long long *msr);
401 int add_counter(unsigned int msr_num, char *path, char *name,
402 unsigned int width, enum counter_scope scope,
403 enum counter_type type, enum counter_format format, int flags, int package_num);
404
405 /* Model specific support Start */
406
407 /* List of features that may diverge among different platforms */
408 struct platform_features {
409 bool has_msr_misc_feature_control; /* MSR_MISC_FEATURE_CONTROL */
410 bool has_msr_misc_pwr_mgmt; /* MSR_MISC_PWR_MGMT */
411 bool has_nhm_msrs; /* MSR_PLATFORM_INFO, MSR_IA32_TEMPERATURE_TARGET, MSR_SMI_COUNT, MSR_PKG_CST_CONFIG_CONTROL, MSR_IA32_POWER_CTL, TRL MSRs */
412 bool has_config_tdp; /* MSR_CONFIG_TDP_NOMINAL/LEVEL_1/LEVEL_2/CONTROL, MSR_TURBO_ACTIVATION_RATIO */
413 int bclk_freq; /* CPU base clock */
414 int crystal_freq; /* Crystal clock to use when not available from CPUID.15 */
415 int supported_cstates; /* Core cstates and Package cstates supported */
416 int cst_limit; /* MSR_PKG_CST_CONFIG_CONTROL */
417 bool has_cst_auto_convension; /* AUTOMATIC_CSTATE_CONVERSION bit in MSR_PKG_CST_CONFIG_CONTROL */
418 bool has_irtl_msrs; /* MSR_PKGC3/PKGC6/PKGC7/PKGC8/PKGC9/PKGC10_IRTL */
419 bool has_msr_core_c1_res; /* MSR_CORE_C1_RES */
420 bool has_msr_module_c6_res_ms; /* MSR_MODULE_C6_RES_MS */
421 bool has_msr_c6_demotion_policy_config; /* MSR_CC6_DEMOTION_POLICY_CONFIG/MSR_MC6_DEMOTION_POLICY_CONFIG */
422 bool has_msr_atom_pkg_c6_residency; /* MSR_ATOM_PKG_C6_RESIDENCY */
423 bool has_msr_knl_core_c6_residency; /* MSR_KNL_CORE_C6_RESIDENCY */
424 bool has_ext_cst_msrs; /* MSR_PKG_WEIGHTED_CORE_C0_RES/MSR_PKG_ANY_CORE_C0_RES/MSR_PKG_ANY_GFXE_C0_RES/MSR_PKG_BOTH_CORE_GFXE_C0_RES */
425 bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */
426 int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */
427 int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */
428 int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */
429 bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
430 bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
431 bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */
432 bool has_fixed_rapl_psys_unit; /* Fixed Energy Unit used for PSYS RAPL Domain */
433 int rapl_quirk_tdp; /* Hardcoded TDP value when cannot be retrieved from hardware */
434 int tcc_offset_bits; /* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
435 bool enable_tsc_tweak; /* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
436 bool need_perf_multiplier; /* mperf/aperf multiplier */
437 };
438
439 struct platform_data {
440 unsigned int vfm;
441 const struct platform_features *features;
442 };
443
444 /* For BCLK */
445 enum bclk_freq {
446 BCLK_100MHZ = 1,
447 BCLK_133MHZ,
448 BCLK_SLV,
449 };
450
451 #define SLM_BCLK_FREQS 5
452 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
453
slm_bclk(void)454 double slm_bclk(void)
455 {
456 unsigned long long msr = 3;
457 unsigned int i;
458 double freq;
459
460 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
461 fprintf(outf, "SLM BCLK: unknown\n");
462
463 i = msr & 0xf;
464 if (i >= SLM_BCLK_FREQS) {
465 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
466 i = 3;
467 }
468 freq = slm_freq_table[i];
469
470 if (!quiet)
471 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
472
473 return freq;
474 }
475
476 /* For Package cstate limit */
477 enum package_cstate_limit {
478 CST_LIMIT_NHM = 1,
479 CST_LIMIT_SNB,
480 CST_LIMIT_HSW,
481 CST_LIMIT_SKX,
482 CST_LIMIT_ICX,
483 CST_LIMIT_SLV,
484 CST_LIMIT_AMT,
485 CST_LIMIT_KNL,
486 CST_LIMIT_GMT,
487 };
488
489 /* For Turbo Ratio Limit MSRs */
490 enum turbo_ratio_limit_msrs {
491 TRL_BASE = BIT(0),
492 TRL_LIMIT1 = BIT(1),
493 TRL_LIMIT2 = BIT(2),
494 TRL_ATOM = BIT(3),
495 TRL_KNL = BIT(4),
496 TRL_CORECOUNT = BIT(5),
497 };
498
499 /* For Perf Limit Reason MSRs */
500 enum perf_limit_reason_msrs {
501 PLR_CORE = BIT(0),
502 PLR_GFX = BIT(1),
503 PLR_RING = BIT(2),
504 };
505
506 /* For RAPL MSRs */
507 enum rapl_msrs {
508 RAPL_PKG_POWER_LIMIT = BIT(0), /* 0x610 MSR_PKG_POWER_LIMIT */
509 RAPL_PKG_ENERGY_STATUS = BIT(1), /* 0x611 MSR_PKG_ENERGY_STATUS */
510 RAPL_PKG_PERF_STATUS = BIT(2), /* 0x613 MSR_PKG_PERF_STATUS */
511 RAPL_PKG_POWER_INFO = BIT(3), /* 0x614 MSR_PKG_POWER_INFO */
512 RAPL_DRAM_POWER_LIMIT = BIT(4), /* 0x618 MSR_DRAM_POWER_LIMIT */
513 RAPL_DRAM_ENERGY_STATUS = BIT(5), /* 0x619 MSR_DRAM_ENERGY_STATUS */
514 RAPL_DRAM_PERF_STATUS = BIT(6), /* 0x61b MSR_DRAM_PERF_STATUS */
515 RAPL_DRAM_POWER_INFO = BIT(7), /* 0x61c MSR_DRAM_POWER_INFO */
516 RAPL_CORE_POWER_LIMIT = BIT(8), /* 0x638 MSR_PP0_POWER_LIMIT */
517 RAPL_CORE_ENERGY_STATUS = BIT(9), /* 0x639 MSR_PP0_ENERGY_STATUS */
518 RAPL_CORE_POLICY = BIT(10), /* 0x63a MSR_PP0_POLICY */
519 RAPL_GFX_POWER_LIMIT = BIT(11), /* 0x640 MSR_PP1_POWER_LIMIT */
520 RAPL_GFX_ENERGY_STATUS = BIT(12), /* 0x641 MSR_PP1_ENERGY_STATUS */
521 RAPL_GFX_POLICY = BIT(13), /* 0x642 MSR_PP1_POLICY */
522 RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */
523 RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */
524 RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */
525 RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */
526 RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */
527 };
528
529 #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT)
530 #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT)
531 #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT)
532 #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS)
533 #define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT)
534
535 #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO)
536 #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO)
537 #define RAPL_CORE_ALL (RAPL_CORE | RAPL_CORE_POLICY)
538 #define RAPL_GFX_ALL (RAPL_GFX | RAPL_GFX_POLIGY)
539
540 #define RAPL_AMD_F17H (RAPL_AMD_PWR_UNIT | RAPL_AMD_CORE_ENERGY_STAT | RAPL_AMD_PKG_ENERGY_STAT)
541
542 /* For Cstates */
543 enum cstates {
544 CC1 = BIT(0),
545 CC3 = BIT(1),
546 CC6 = BIT(2),
547 CC7 = BIT(3),
548 PC2 = BIT(4),
549 PC3 = BIT(5),
550 PC6 = BIT(6),
551 PC7 = BIT(7),
552 PC8 = BIT(8),
553 PC9 = BIT(9),
554 PC10 = BIT(10),
555 };
556
557 static const struct platform_features nhm_features = {
558 .has_msr_misc_pwr_mgmt = 1,
559 .has_nhm_msrs = 1,
560 .bclk_freq = BCLK_133MHZ,
561 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
562 .cst_limit = CST_LIMIT_NHM,
563 .trl_msrs = TRL_BASE,
564 };
565
566 static const struct platform_features nhx_features = {
567 .has_msr_misc_pwr_mgmt = 1,
568 .has_nhm_msrs = 1,
569 .bclk_freq = BCLK_133MHZ,
570 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
571 .cst_limit = CST_LIMIT_NHM,
572 };
573
574 static const struct platform_features snb_features = {
575 .has_msr_misc_feature_control = 1,
576 .has_msr_misc_pwr_mgmt = 1,
577 .has_nhm_msrs = 1,
578 .bclk_freq = BCLK_100MHZ,
579 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
580 .cst_limit = CST_LIMIT_SNB,
581 .has_irtl_msrs = 1,
582 .trl_msrs = TRL_BASE,
583 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
584 };
585
586 static const struct platform_features snx_features = {
587 .has_msr_misc_feature_control = 1,
588 .has_msr_misc_pwr_mgmt = 1,
589 .has_nhm_msrs = 1,
590 .bclk_freq = BCLK_100MHZ,
591 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
592 .cst_limit = CST_LIMIT_SNB,
593 .has_irtl_msrs = 1,
594 .trl_msrs = TRL_BASE,
595 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
596 };
597
598 static const struct platform_features ivb_features = {
599 .has_msr_misc_feature_control = 1,
600 .has_msr_misc_pwr_mgmt = 1,
601 .has_nhm_msrs = 1,
602 .has_config_tdp = 1,
603 .bclk_freq = BCLK_100MHZ,
604 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
605 .cst_limit = CST_LIMIT_SNB,
606 .has_irtl_msrs = 1,
607 .trl_msrs = TRL_BASE,
608 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
609 };
610
611 static const struct platform_features ivx_features = {
612 .has_msr_misc_feature_control = 1,
613 .has_msr_misc_pwr_mgmt = 1,
614 .has_nhm_msrs = 1,
615 .bclk_freq = BCLK_100MHZ,
616 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
617 .cst_limit = CST_LIMIT_SNB,
618 .has_irtl_msrs = 1,
619 .trl_msrs = TRL_BASE | TRL_LIMIT1,
620 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL,
621 };
622
623 static const struct platform_features hsw_features = {
624 .has_msr_misc_feature_control = 1,
625 .has_msr_misc_pwr_mgmt = 1,
626 .has_nhm_msrs = 1,
627 .has_config_tdp = 1,
628 .bclk_freq = BCLK_100MHZ,
629 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
630 .cst_limit = CST_LIMIT_HSW,
631 .has_irtl_msrs = 1,
632 .trl_msrs = TRL_BASE,
633 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
634 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
635 };
636
637 static const struct platform_features hsx_features = {
638 .has_msr_misc_feature_control = 1,
639 .has_msr_misc_pwr_mgmt = 1,
640 .has_nhm_msrs = 1,
641 .has_config_tdp = 1,
642 .bclk_freq = BCLK_100MHZ,
643 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
644 .cst_limit = CST_LIMIT_HSW,
645 .has_irtl_msrs = 1,
646 .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2,
647 .plr_msrs = PLR_CORE | PLR_RING,
648 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
649 .has_fixed_rapl_unit = 1,
650 };
651
652 static const struct platform_features hswl_features = {
653 .has_msr_misc_feature_control = 1,
654 .has_msr_misc_pwr_mgmt = 1,
655 .has_nhm_msrs = 1,
656 .has_config_tdp = 1,
657 .bclk_freq = BCLK_100MHZ,
658 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
659 .cst_limit = CST_LIMIT_HSW,
660 .has_irtl_msrs = 1,
661 .trl_msrs = TRL_BASE,
662 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
663 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
664 };
665
666 static const struct platform_features hswg_features = {
667 .has_msr_misc_feature_control = 1,
668 .has_msr_misc_pwr_mgmt = 1,
669 .has_nhm_msrs = 1,
670 .has_config_tdp = 1,
671 .bclk_freq = BCLK_100MHZ,
672 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
673 .cst_limit = CST_LIMIT_HSW,
674 .has_irtl_msrs = 1,
675 .trl_msrs = TRL_BASE,
676 .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING,
677 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
678 };
679
680 static const struct platform_features bdw_features = {
681 .has_msr_misc_feature_control = 1,
682 .has_msr_misc_pwr_mgmt = 1,
683 .has_nhm_msrs = 1,
684 .has_config_tdp = 1,
685 .bclk_freq = BCLK_100MHZ,
686 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
687 .cst_limit = CST_LIMIT_HSW,
688 .has_irtl_msrs = 1,
689 .trl_msrs = TRL_BASE,
690 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
691 };
692
693 static const struct platform_features bdwg_features = {
694 .has_msr_misc_feature_control = 1,
695 .has_msr_misc_pwr_mgmt = 1,
696 .has_nhm_msrs = 1,
697 .has_config_tdp = 1,
698 .bclk_freq = BCLK_100MHZ,
699 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7,
700 .cst_limit = CST_LIMIT_HSW,
701 .has_irtl_msrs = 1,
702 .trl_msrs = TRL_BASE,
703 .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO,
704 };
705
706 static const struct platform_features bdx_features = {
707 .has_msr_misc_feature_control = 1,
708 .has_msr_misc_pwr_mgmt = 1,
709 .has_nhm_msrs = 1,
710 .has_config_tdp = 1,
711 .bclk_freq = BCLK_100MHZ,
712 .supported_cstates = CC1 | CC3 | CC6 | PC2 | PC3 | PC6,
713 .cst_limit = CST_LIMIT_HSW,
714 .has_irtl_msrs = 1,
715 .has_cst_auto_convension = 1,
716 .trl_msrs = TRL_BASE,
717 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
718 .has_fixed_rapl_unit = 1,
719 };
720
721 static const struct platform_features skl_features = {
722 .has_msr_misc_feature_control = 1,
723 .has_msr_misc_pwr_mgmt = 1,
724 .has_nhm_msrs = 1,
725 .has_config_tdp = 1,
726 .bclk_freq = BCLK_100MHZ,
727 .crystal_freq = 24000000,
728 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
729 .cst_limit = CST_LIMIT_HSW,
730 .has_irtl_msrs = 1,
731 .has_ext_cst_msrs = 1,
732 .trl_msrs = TRL_BASE,
733 .tcc_offset_bits = 6,
734 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
735 .enable_tsc_tweak = 1,
736 };
737
738 static const struct platform_features cnl_features = {
739 .has_msr_misc_feature_control = 1,
740 .has_msr_misc_pwr_mgmt = 1,
741 .has_nhm_msrs = 1,
742 .has_config_tdp = 1,
743 .bclk_freq = BCLK_100MHZ,
744 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
745 .cst_limit = CST_LIMIT_HSW,
746 .has_irtl_msrs = 1,
747 .has_msr_core_c1_res = 1,
748 .has_ext_cst_msrs = 1,
749 .trl_msrs = TRL_BASE,
750 .tcc_offset_bits = 6,
751 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS,
752 .enable_tsc_tweak = 1,
753 };
754
755 /* Copied from cnl_features, with PC7/PC9 removed */
756 static const struct platform_features adl_features = {
757 .has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control,
758 .has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt,
759 .has_nhm_msrs = cnl_features.has_nhm_msrs,
760 .has_config_tdp = cnl_features.has_config_tdp,
761 .bclk_freq = cnl_features.bclk_freq,
762 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10,
763 .cst_limit = cnl_features.cst_limit,
764 .has_irtl_msrs = cnl_features.has_irtl_msrs,
765 .has_msr_core_c1_res = cnl_features.has_msr_core_c1_res,
766 .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs,
767 .trl_msrs = cnl_features.trl_msrs,
768 .tcc_offset_bits = cnl_features.tcc_offset_bits,
769 .rapl_msrs = cnl_features.rapl_msrs,
770 .enable_tsc_tweak = cnl_features.enable_tsc_tweak,
771 };
772
773 /* Copied from adl_features, with PC3/PC8 removed */
774 static const struct platform_features lnl_features = {
775 .has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control,
776 .has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt,
777 .has_nhm_msrs = adl_features.has_nhm_msrs,
778 .has_config_tdp = adl_features.has_config_tdp,
779 .bclk_freq = adl_features.bclk_freq,
780 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10,
781 .cst_limit = adl_features.cst_limit,
782 .has_irtl_msrs = adl_features.has_irtl_msrs,
783 .has_msr_core_c1_res = adl_features.has_msr_core_c1_res,
784 .has_ext_cst_msrs = adl_features.has_ext_cst_msrs,
785 .trl_msrs = adl_features.trl_msrs,
786 .tcc_offset_bits = adl_features.tcc_offset_bits,
787 .rapl_msrs = adl_features.rapl_msrs,
788 .enable_tsc_tweak = adl_features.enable_tsc_tweak,
789 };
790
791 static const struct platform_features skx_features = {
792 .has_msr_misc_feature_control = 1,
793 .has_msr_misc_pwr_mgmt = 1,
794 .has_nhm_msrs = 1,
795 .has_config_tdp = 1,
796 .bclk_freq = BCLK_100MHZ,
797 .supported_cstates = CC1 | CC6 | PC2 | PC6,
798 .cst_limit = CST_LIMIT_SKX,
799 .has_irtl_msrs = 1,
800 .has_cst_auto_convension = 1,
801 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
802 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
803 .has_fixed_rapl_unit = 1,
804 };
805
806 static const struct platform_features icx_features = {
807 .has_msr_misc_feature_control = 1,
808 .has_msr_misc_pwr_mgmt = 1,
809 .has_nhm_msrs = 1,
810 .has_config_tdp = 1,
811 .bclk_freq = BCLK_100MHZ,
812 .supported_cstates = CC1 | CC6 | PC2 | PC6,
813 .cst_limit = CST_LIMIT_ICX,
814 .has_msr_core_c1_res = 1,
815 .has_irtl_msrs = 1,
816 .has_cst_prewake_bit = 1,
817 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
818 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
819 .has_fixed_rapl_unit = 1,
820 };
821
822 static const struct platform_features spr_features = {
823 .has_msr_misc_feature_control = 1,
824 .has_msr_misc_pwr_mgmt = 1,
825 .has_nhm_msrs = 1,
826 .has_config_tdp = 1,
827 .bclk_freq = BCLK_100MHZ,
828 .supported_cstates = CC1 | CC6 | PC2 | PC6,
829 .cst_limit = CST_LIMIT_SKX,
830 .has_msr_core_c1_res = 1,
831 .has_irtl_msrs = 1,
832 .has_cst_prewake_bit = 1,
833 .has_fixed_rapl_psys_unit = 1,
834 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
835 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
836 };
837
838 static const struct platform_features srf_features = {
839 .has_msr_misc_feature_control = 1,
840 .has_msr_misc_pwr_mgmt = 1,
841 .has_nhm_msrs = 1,
842 .has_config_tdp = 1,
843 .bclk_freq = BCLK_100MHZ,
844 .supported_cstates = CC1 | CC6 | PC2 | PC6,
845 .cst_limit = CST_LIMIT_SKX,
846 .has_msr_core_c1_res = 1,
847 .has_msr_module_c6_res_ms = 1,
848 .has_irtl_msrs = 1,
849 .has_cst_prewake_bit = 1,
850 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
851 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
852 };
853
854 static const struct platform_features grr_features = {
855 .has_msr_misc_feature_control = 1,
856 .has_msr_misc_pwr_mgmt = 1,
857 .has_nhm_msrs = 1,
858 .has_config_tdp = 1,
859 .bclk_freq = BCLK_100MHZ,
860 .supported_cstates = CC1 | CC6,
861 .cst_limit = CST_LIMIT_SKX,
862 .has_msr_core_c1_res = 1,
863 .has_msr_module_c6_res_ms = 1,
864 .has_irtl_msrs = 1,
865 .has_cst_prewake_bit = 1,
866 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
867 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
868 };
869
870 static const struct platform_features slv_features = {
871 .has_nhm_msrs = 1,
872 .bclk_freq = BCLK_SLV,
873 .supported_cstates = CC1 | CC6 | PC6,
874 .cst_limit = CST_LIMIT_SLV,
875 .has_msr_core_c1_res = 1,
876 .has_msr_module_c6_res_ms = 1,
877 .has_msr_c6_demotion_policy_config = 1,
878 .has_msr_atom_pkg_c6_residency = 1,
879 .trl_msrs = TRL_ATOM,
880 .rapl_msrs = RAPL_PKG | RAPL_CORE,
881 .has_rapl_divisor = 1,
882 .rapl_quirk_tdp = 30,
883 };
884
885 static const struct platform_features slvd_features = {
886 .has_msr_misc_pwr_mgmt = 1,
887 .has_nhm_msrs = 1,
888 .bclk_freq = BCLK_SLV,
889 .supported_cstates = CC1 | CC6 | PC3 | PC6,
890 .cst_limit = CST_LIMIT_SLV,
891 .has_msr_atom_pkg_c6_residency = 1,
892 .trl_msrs = TRL_BASE,
893 .rapl_msrs = RAPL_PKG | RAPL_CORE,
894 .rapl_quirk_tdp = 30,
895 };
896
897 static const struct platform_features amt_features = {
898 .has_nhm_msrs = 1,
899 .bclk_freq = BCLK_133MHZ,
900 .supported_cstates = CC1 | CC3 | CC6 | PC3 | PC6,
901 .cst_limit = CST_LIMIT_AMT,
902 .trl_msrs = TRL_BASE,
903 };
904
905 static const struct platform_features gmt_features = {
906 .has_msr_misc_pwr_mgmt = 1,
907 .has_nhm_msrs = 1,
908 .bclk_freq = BCLK_100MHZ,
909 .crystal_freq = 19200000,
910 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
911 .cst_limit = CST_LIMIT_GMT,
912 .has_irtl_msrs = 1,
913 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
914 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
915 };
916
917 static const struct platform_features gmtd_features = {
918 .has_msr_misc_pwr_mgmt = 1,
919 .has_nhm_msrs = 1,
920 .bclk_freq = BCLK_100MHZ,
921 .crystal_freq = 25000000,
922 .supported_cstates = CC1 | CC6 | PC2 | PC6,
923 .cst_limit = CST_LIMIT_GMT,
924 .has_irtl_msrs = 1,
925 .has_msr_core_c1_res = 1,
926 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
927 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS,
928 };
929
930 static const struct platform_features gmtp_features = {
931 .has_msr_misc_pwr_mgmt = 1,
932 .has_nhm_msrs = 1,
933 .bclk_freq = BCLK_100MHZ,
934 .crystal_freq = 19200000,
935 .supported_cstates = CC1 | CC3 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
936 .cst_limit = CST_LIMIT_GMT,
937 .has_irtl_msrs = 1,
938 .trl_msrs = TRL_BASE,
939 .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO,
940 };
941
942 static const struct platform_features tmt_features = {
943 .has_msr_misc_pwr_mgmt = 1,
944 .has_nhm_msrs = 1,
945 .bclk_freq = BCLK_100MHZ,
946 .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC7 | PC8 | PC9 | PC10,
947 .cst_limit = CST_LIMIT_GMT,
948 .has_irtl_msrs = 1,
949 .trl_msrs = TRL_BASE,
950 .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX,
951 .enable_tsc_tweak = 1,
952 };
953
954 static const struct platform_features tmtd_features = {
955 .has_msr_misc_pwr_mgmt = 1,
956 .has_nhm_msrs = 1,
957 .bclk_freq = BCLK_100MHZ,
958 .supported_cstates = CC1 | CC6,
959 .cst_limit = CST_LIMIT_GMT,
960 .has_irtl_msrs = 1,
961 .trl_msrs = TRL_BASE | TRL_CORECOUNT,
962 .rapl_msrs = RAPL_PKG_ALL,
963 };
964
965 static const struct platform_features knl_features = {
966 .has_msr_misc_pwr_mgmt = 1,
967 .has_nhm_msrs = 1,
968 .has_config_tdp = 1,
969 .bclk_freq = BCLK_100MHZ,
970 .supported_cstates = CC1 | CC6 | PC3 | PC6,
971 .cst_limit = CST_LIMIT_KNL,
972 .has_msr_knl_core_c6_residency = 1,
973 .trl_msrs = TRL_KNL,
974 .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL,
975 .has_fixed_rapl_unit = 1,
976 .need_perf_multiplier = 1,
977 };
978
979 static const struct platform_features default_features = {
980 };
981
982 static const struct platform_features amd_features_with_rapl = {
983 .rapl_msrs = RAPL_AMD_F17H,
984 .has_per_core_rapl = 1,
985 .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
986 };
987
988 static const struct platform_data turbostat_pdata[] = {
989 { INTEL_NEHALEM, &nhm_features },
990 { INTEL_NEHALEM_G, &nhm_features },
991 { INTEL_NEHALEM_EP, &nhm_features },
992 { INTEL_NEHALEM_EX, &nhx_features },
993 { INTEL_WESTMERE, &nhm_features },
994 { INTEL_WESTMERE_EP, &nhm_features },
995 { INTEL_WESTMERE_EX, &nhx_features },
996 { INTEL_SANDYBRIDGE, &snb_features },
997 { INTEL_SANDYBRIDGE_X, &snx_features },
998 { INTEL_IVYBRIDGE, &ivb_features },
999 { INTEL_IVYBRIDGE_X, &ivx_features },
1000 { INTEL_HASWELL, &hsw_features },
1001 { INTEL_HASWELL_X, &hsx_features },
1002 { INTEL_HASWELL_L, &hswl_features },
1003 { INTEL_HASWELL_G, &hswg_features },
1004 { INTEL_BROADWELL, &bdw_features },
1005 { INTEL_BROADWELL_G, &bdwg_features },
1006 { INTEL_BROADWELL_X, &bdx_features },
1007 { INTEL_BROADWELL_D, &bdx_features },
1008 { INTEL_SKYLAKE_L, &skl_features },
1009 { INTEL_SKYLAKE, &skl_features },
1010 { INTEL_SKYLAKE_X, &skx_features },
1011 { INTEL_KABYLAKE_L, &skl_features },
1012 { INTEL_KABYLAKE, &skl_features },
1013 { INTEL_COMETLAKE, &skl_features },
1014 { INTEL_COMETLAKE_L, &skl_features },
1015 { INTEL_CANNONLAKE_L, &cnl_features },
1016 { INTEL_ICELAKE_X, &icx_features },
1017 { INTEL_ICELAKE_D, &icx_features },
1018 { INTEL_ICELAKE_L, &cnl_features },
1019 { INTEL_ICELAKE_NNPI, &cnl_features },
1020 { INTEL_ROCKETLAKE, &cnl_features },
1021 { INTEL_TIGERLAKE_L, &cnl_features },
1022 { INTEL_TIGERLAKE, &cnl_features },
1023 { INTEL_SAPPHIRERAPIDS_X, &spr_features },
1024 { INTEL_EMERALDRAPIDS_X, &spr_features },
1025 { INTEL_GRANITERAPIDS_X, &spr_features },
1026 { INTEL_GRANITERAPIDS_D, &spr_features },
1027 { INTEL_LAKEFIELD, &cnl_features },
1028 { INTEL_ALDERLAKE, &adl_features },
1029 { INTEL_ALDERLAKE_L, &adl_features },
1030 { INTEL_RAPTORLAKE, &adl_features },
1031 { INTEL_RAPTORLAKE_P, &adl_features },
1032 { INTEL_RAPTORLAKE_S, &adl_features },
1033 { INTEL_METEORLAKE, &adl_features },
1034 { INTEL_METEORLAKE_L, &adl_features },
1035 { INTEL_ARROWLAKE_H, &adl_features },
1036 { INTEL_ARROWLAKE_U, &adl_features },
1037 { INTEL_ARROWLAKE, &adl_features },
1038 { INTEL_LUNARLAKE_M, &lnl_features },
1039 { INTEL_PANTHERLAKE_L, &lnl_features },
1040 { INTEL_ATOM_SILVERMONT, &slv_features },
1041 { INTEL_ATOM_SILVERMONT_D, &slvd_features },
1042 { INTEL_ATOM_AIRMONT, &amt_features },
1043 { INTEL_ATOM_GOLDMONT, &gmt_features },
1044 { INTEL_ATOM_GOLDMONT_D, &gmtd_features },
1045 { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features },
1046 { INTEL_ATOM_TREMONT_D, &tmtd_features },
1047 { INTEL_ATOM_TREMONT, &tmt_features },
1048 { INTEL_ATOM_TREMONT_L, &tmt_features },
1049 { INTEL_ATOM_GRACEMONT, &adl_features },
1050 { INTEL_ATOM_CRESTMONT_X, &srf_features },
1051 { INTEL_ATOM_CRESTMONT, &grr_features },
1052 { INTEL_ATOM_DARKMONT_X, &srf_features },
1053 { INTEL_XEON_PHI_KNL, &knl_features },
1054 { INTEL_XEON_PHI_KNM, &knl_features },
1055 /*
1056 * Missing support for
1057 * INTEL_ICELAKE
1058 * INTEL_ATOM_SILVERMONT_MID
1059 * INTEL_ATOM_AIRMONT_MID
1060 * INTEL_ATOM_AIRMONT_NP
1061 */
1062 { 0, NULL },
1063 };
1064
1065 static const struct platform_features *platform;
1066
probe_platform_features(unsigned int family,unsigned int model)1067 void probe_platform_features(unsigned int family, unsigned int model)
1068 {
1069 int i;
1070
1071
1072 if (authentic_amd || hygon_genuine) {
1073 /* fallback to default features on unsupported models */
1074 force_load++;
1075 if (max_extended_level >= 0x80000007) {
1076 unsigned int eax, ebx, ecx, edx;
1077
1078 __cpuid(0x80000007, eax, ebx, ecx, edx);
1079 /* RAPL (Fam 17h+) */
1080 if ((edx & (1 << 14)) && family >= 0x17)
1081 platform = &amd_features_with_rapl;
1082 }
1083 goto end;
1084 }
1085
1086 if (!genuine_intel)
1087 goto end;
1088
1089 for (i = 0; turbostat_pdata[i].features; i++) {
1090 if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) {
1091 platform = turbostat_pdata[i].features;
1092 return;
1093 }
1094 }
1095
1096 end:
1097 if (force_load && !platform) {
1098 fprintf(outf, "Forced to run on unsupported platform!\n");
1099 platform = &default_features;
1100 }
1101
1102 if (platform)
1103 return;
1104
1105 fprintf(stderr, "Unsupported platform detected.\n"
1106 "\tSee RUN THE LATEST VERSION on turbostat(8)\n");
1107 exit(1);
1108 }
1109
1110 /* Model specific support End */
1111
1112 #define TJMAX_DEFAULT 100
1113
1114 /* MSRs that are not yet in the kernel-provided header. */
1115 #define MSR_RAPL_PWR_UNIT 0xc0010299
1116 #define MSR_CORE_ENERGY_STAT 0xc001029a
1117 #define MSR_PKG_ENERGY_STAT 0xc001029b
1118
1119 #define MAX(a, b) ((a) > (b) ? (a) : (b))
1120
1121 int backwards_count;
1122 char *progname;
1123
1124 #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
1125 cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
1126 size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
1127 #define MAX_ADDED_THREAD_COUNTERS 24
1128 #define MAX_ADDED_CORE_COUNTERS 8
1129 #define MAX_ADDED_PACKAGE_COUNTERS 16
1130 #define PMT_MAX_ADDED_THREAD_COUNTERS 24
1131 #define PMT_MAX_ADDED_CORE_COUNTERS 8
1132 #define PMT_MAX_ADDED_PACKAGE_COUNTERS 16
1133 #define BITMASK_SIZE 32
1134
1135 #define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr))
1136
1137 /* Indexes used to map data read from perf and MSRs into global variables */
1138 enum rapl_rci_index {
1139 RAPL_RCI_INDEX_ENERGY_PKG = 0,
1140 RAPL_RCI_INDEX_ENERGY_CORES = 1,
1141 RAPL_RCI_INDEX_DRAM = 2,
1142 RAPL_RCI_INDEX_GFX = 3,
1143 RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
1144 RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
1145 RAPL_RCI_INDEX_CORE_ENERGY = 6,
1146 RAPL_RCI_INDEX_ENERGY_PLATFORM = 7,
1147 NUM_RAPL_COUNTERS,
1148 };
1149
1150 enum rapl_unit {
1151 RAPL_UNIT_INVALID,
1152 RAPL_UNIT_JOULES,
1153 RAPL_UNIT_WATTS,
1154 };
1155
1156 struct rapl_counter_info_t {
1157 unsigned long long data[NUM_RAPL_COUNTERS];
1158 enum counter_source source[NUM_RAPL_COUNTERS];
1159 unsigned long long flags[NUM_RAPL_COUNTERS];
1160 double scale[NUM_RAPL_COUNTERS];
1161 enum rapl_unit unit[NUM_RAPL_COUNTERS];
1162 unsigned long long msr[NUM_RAPL_COUNTERS];
1163 unsigned long long msr_mask[NUM_RAPL_COUNTERS];
1164 int msr_shift[NUM_RAPL_COUNTERS];
1165
1166 int fd_perf;
1167 };
1168
1169 /* struct rapl_counter_info_t for each RAPL domain */
1170 struct rapl_counter_info_t *rapl_counter_info_perdomain;
1171 unsigned int rapl_counter_info_perdomain_size;
1172
1173 #define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0)
1174 #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
1175
1176 struct rapl_counter_arch_info {
1177 int feature_mask; /* Mask for testing if the counter is supported on host */
1178 const char *perf_subsys;
1179 const char *perf_name;
1180 unsigned long long msr;
1181 unsigned long long msr_mask;
1182 int msr_shift; /* Positive mean shift right, negative mean shift left */
1183 double *platform_rapl_msr_scale; /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
1184 unsigned int rci_index; /* Maps data from perf counters to global variables */
1185 unsigned long long bic;
1186 double compat_scale; /* Some counters require constant scaling to be in the same range as other, similar ones */
1187 unsigned long long flags;
1188 };
1189
1190 static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
1191 {
1192 .feature_mask = RAPL_PKG,
1193 .perf_subsys = "power",
1194 .perf_name = "energy-pkg",
1195 .msr = MSR_PKG_ENERGY_STATUS,
1196 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1197 .msr_shift = 0,
1198 .platform_rapl_msr_scale = &rapl_energy_units,
1199 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1200 .bic = BIC_PkgWatt | BIC_Pkg_J,
1201 .compat_scale = 1.0,
1202 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1203 },
1204 {
1205 .feature_mask = RAPL_AMD_F17H,
1206 .perf_subsys = "power",
1207 .perf_name = "energy-pkg",
1208 .msr = MSR_PKG_ENERGY_STAT,
1209 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1210 .msr_shift = 0,
1211 .platform_rapl_msr_scale = &rapl_energy_units,
1212 .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
1213 .bic = BIC_PkgWatt | BIC_Pkg_J,
1214 .compat_scale = 1.0,
1215 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1216 },
1217 {
1218 .feature_mask = RAPL_CORE_ENERGY_STATUS,
1219 .perf_subsys = "power",
1220 .perf_name = "energy-cores",
1221 .msr = MSR_PP0_ENERGY_STATUS,
1222 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1223 .msr_shift = 0,
1224 .platform_rapl_msr_scale = &rapl_energy_units,
1225 .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
1226 .bic = BIC_CorWatt | BIC_Cor_J,
1227 .compat_scale = 1.0,
1228 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1229 },
1230 {
1231 .feature_mask = RAPL_DRAM,
1232 .perf_subsys = "power",
1233 .perf_name = "energy-ram",
1234 .msr = MSR_DRAM_ENERGY_STATUS,
1235 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1236 .msr_shift = 0,
1237 .platform_rapl_msr_scale = &rapl_dram_energy_units,
1238 .rci_index = RAPL_RCI_INDEX_DRAM,
1239 .bic = BIC_RAMWatt | BIC_RAM_J,
1240 .compat_scale = 1.0,
1241 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1242 },
1243 {
1244 .feature_mask = RAPL_GFX,
1245 .perf_subsys = "power",
1246 .perf_name = "energy-gpu",
1247 .msr = MSR_PP1_ENERGY_STATUS,
1248 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1249 .msr_shift = 0,
1250 .platform_rapl_msr_scale = &rapl_energy_units,
1251 .rci_index = RAPL_RCI_INDEX_GFX,
1252 .bic = BIC_GFXWatt | BIC_GFX_J,
1253 .compat_scale = 1.0,
1254 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1255 },
1256 {
1257 .feature_mask = RAPL_PKG_PERF_STATUS,
1258 .perf_subsys = NULL,
1259 .perf_name = NULL,
1260 .msr = MSR_PKG_PERF_STATUS,
1261 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1262 .msr_shift = 0,
1263 .platform_rapl_msr_scale = &rapl_time_units,
1264 .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
1265 .bic = BIC_PKG__,
1266 .compat_scale = 100.0,
1267 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1268 },
1269 {
1270 .feature_mask = RAPL_DRAM_PERF_STATUS,
1271 .perf_subsys = NULL,
1272 .perf_name = NULL,
1273 .msr = MSR_DRAM_PERF_STATUS,
1274 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1275 .msr_shift = 0,
1276 .platform_rapl_msr_scale = &rapl_time_units,
1277 .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
1278 .bic = BIC_RAM__,
1279 .compat_scale = 100.0,
1280 .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
1281 },
1282 {
1283 .feature_mask = RAPL_AMD_F17H,
1284 .perf_subsys = NULL,
1285 .perf_name = NULL,
1286 .msr = MSR_CORE_ENERGY_STAT,
1287 .msr_mask = 0xFFFFFFFF,
1288 .msr_shift = 0,
1289 .platform_rapl_msr_scale = &rapl_energy_units,
1290 .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
1291 .bic = BIC_CorWatt | BIC_Cor_J,
1292 .compat_scale = 1.0,
1293 .flags = 0,
1294 },
1295 {
1296 .feature_mask = RAPL_PSYS,
1297 .perf_subsys = "power",
1298 .perf_name = "energy-psys",
1299 .msr = MSR_PLATFORM_ENERGY_STATUS,
1300 .msr_mask = 0x00000000FFFFFFFF,
1301 .msr_shift = 0,
1302 .platform_rapl_msr_scale = &rapl_psys_energy_units,
1303 .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM,
1304 .bic = BIC_SysWatt | BIC_Sys_J,
1305 .compat_scale = 1.0,
1306 .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM,
1307 },
1308 };
1309
1310 struct rapl_counter {
1311 unsigned long long raw_value;
1312 enum rapl_unit unit;
1313 double scale;
1314 };
1315
1316 /* Indexes used to map data read from perf and MSRs into global variables */
1317 enum ccstate_rci_index {
1318 CCSTATE_RCI_INDEX_C1_RESIDENCY = 0,
1319 CCSTATE_RCI_INDEX_C3_RESIDENCY = 1,
1320 CCSTATE_RCI_INDEX_C6_RESIDENCY = 2,
1321 CCSTATE_RCI_INDEX_C7_RESIDENCY = 3,
1322 PCSTATE_RCI_INDEX_C2_RESIDENCY = 4,
1323 PCSTATE_RCI_INDEX_C3_RESIDENCY = 5,
1324 PCSTATE_RCI_INDEX_C6_RESIDENCY = 6,
1325 PCSTATE_RCI_INDEX_C7_RESIDENCY = 7,
1326 PCSTATE_RCI_INDEX_C8_RESIDENCY = 8,
1327 PCSTATE_RCI_INDEX_C9_RESIDENCY = 9,
1328 PCSTATE_RCI_INDEX_C10_RESIDENCY = 10,
1329 NUM_CSTATE_COUNTERS,
1330 };
1331
1332 struct cstate_counter_info_t {
1333 unsigned long long data[NUM_CSTATE_COUNTERS];
1334 enum counter_source source[NUM_CSTATE_COUNTERS];
1335 unsigned long long msr[NUM_CSTATE_COUNTERS];
1336 int fd_perf_core;
1337 int fd_perf_pkg;
1338 };
1339
1340 struct cstate_counter_info_t *ccstate_counter_info;
1341 unsigned int ccstate_counter_info_size;
1342
1343 #define CSTATE_COUNTER_FLAG_COLLECT_PER_CORE (1u << 0)
1344 #define CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD ((1u << 1) | CSTATE_COUNTER_FLAG_COLLECT_PER_CORE)
1345 #define CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY (1u << 2)
1346
1347 struct cstate_counter_arch_info {
1348 int feature_mask; /* Mask for testing if the counter is supported on host */
1349 const char *perf_subsys;
1350 const char *perf_name;
1351 unsigned long long msr;
1352 unsigned int rci_index; /* Maps data from perf counters to global variables */
1353 unsigned long long bic;
1354 unsigned long long flags;
1355 int pkg_cstate_limit;
1356 };
1357
1358 static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = {
1359 {
1360 .feature_mask = CC1,
1361 .perf_subsys = "cstate_core",
1362 .perf_name = "c1-residency",
1363 .msr = MSR_CORE_C1_RES,
1364 .rci_index = CCSTATE_RCI_INDEX_C1_RESIDENCY,
1365 .bic = BIC_CPU_c1,
1366 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD,
1367 .pkg_cstate_limit = 0,
1368 },
1369 {
1370 .feature_mask = CC3,
1371 .perf_subsys = "cstate_core",
1372 .perf_name = "c3-residency",
1373 .msr = MSR_CORE_C3_RESIDENCY,
1374 .rci_index = CCSTATE_RCI_INDEX_C3_RESIDENCY,
1375 .bic = BIC_CPU_c3,
1376 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY,
1377 .pkg_cstate_limit = 0,
1378 },
1379 {
1380 .feature_mask = CC6,
1381 .perf_subsys = "cstate_core",
1382 .perf_name = "c6-residency",
1383 .msr = MSR_CORE_C6_RESIDENCY,
1384 .rci_index = CCSTATE_RCI_INDEX_C6_RESIDENCY,
1385 .bic = BIC_CPU_c6,
1386 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY,
1387 .pkg_cstate_limit = 0,
1388 },
1389 {
1390 .feature_mask = CC7,
1391 .perf_subsys = "cstate_core",
1392 .perf_name = "c7-residency",
1393 .msr = MSR_CORE_C7_RESIDENCY,
1394 .rci_index = CCSTATE_RCI_INDEX_C7_RESIDENCY,
1395 .bic = BIC_CPU_c7,
1396 .flags = CSTATE_COUNTER_FLAG_COLLECT_PER_CORE | CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY,
1397 .pkg_cstate_limit = 0,
1398 },
1399 {
1400 .feature_mask = PC2,
1401 .perf_subsys = "cstate_pkg",
1402 .perf_name = "c2-residency",
1403 .msr = MSR_PKG_C2_RESIDENCY,
1404 .rci_index = PCSTATE_RCI_INDEX_C2_RESIDENCY,
1405 .bic = BIC_Pkgpc2,
1406 .flags = 0,
1407 .pkg_cstate_limit = PCL__2,
1408 },
1409 {
1410 .feature_mask = PC3,
1411 .perf_subsys = "cstate_pkg",
1412 .perf_name = "c3-residency",
1413 .msr = MSR_PKG_C3_RESIDENCY,
1414 .rci_index = PCSTATE_RCI_INDEX_C3_RESIDENCY,
1415 .bic = BIC_Pkgpc3,
1416 .flags = 0,
1417 .pkg_cstate_limit = PCL__3,
1418 },
1419 {
1420 .feature_mask = PC6,
1421 .perf_subsys = "cstate_pkg",
1422 .perf_name = "c6-residency",
1423 .msr = MSR_PKG_C6_RESIDENCY,
1424 .rci_index = PCSTATE_RCI_INDEX_C6_RESIDENCY,
1425 .bic = BIC_Pkgpc6,
1426 .flags = 0,
1427 .pkg_cstate_limit = PCL__6,
1428 },
1429 {
1430 .feature_mask = PC7,
1431 .perf_subsys = "cstate_pkg",
1432 .perf_name = "c7-residency",
1433 .msr = MSR_PKG_C7_RESIDENCY,
1434 .rci_index = PCSTATE_RCI_INDEX_C7_RESIDENCY,
1435 .bic = BIC_Pkgpc7,
1436 .flags = 0,
1437 .pkg_cstate_limit = PCL__7,
1438 },
1439 {
1440 .feature_mask = PC8,
1441 .perf_subsys = "cstate_pkg",
1442 .perf_name = "c8-residency",
1443 .msr = MSR_PKG_C8_RESIDENCY,
1444 .rci_index = PCSTATE_RCI_INDEX_C8_RESIDENCY,
1445 .bic = BIC_Pkgpc8,
1446 .flags = 0,
1447 .pkg_cstate_limit = PCL__8,
1448 },
1449 {
1450 .feature_mask = PC9,
1451 .perf_subsys = "cstate_pkg",
1452 .perf_name = "c9-residency",
1453 .msr = MSR_PKG_C9_RESIDENCY,
1454 .rci_index = PCSTATE_RCI_INDEX_C9_RESIDENCY,
1455 .bic = BIC_Pkgpc9,
1456 .flags = 0,
1457 .pkg_cstate_limit = PCL__9,
1458 },
1459 {
1460 .feature_mask = PC10,
1461 .perf_subsys = "cstate_pkg",
1462 .perf_name = "c10-residency",
1463 .msr = MSR_PKG_C10_RESIDENCY,
1464 .rci_index = PCSTATE_RCI_INDEX_C10_RESIDENCY,
1465 .bic = BIC_Pkgpc10,
1466 .flags = 0,
1467 .pkg_cstate_limit = PCL_10,
1468 },
1469 };
1470
1471 /* Indexes used to map data read from perf and MSRs into global variables */
1472 enum msr_rci_index {
1473 MSR_RCI_INDEX_APERF = 0,
1474 MSR_RCI_INDEX_MPERF = 1,
1475 MSR_RCI_INDEX_SMI = 2,
1476 NUM_MSR_COUNTERS,
1477 };
1478
1479 struct msr_counter_info_t {
1480 unsigned long long data[NUM_MSR_COUNTERS];
1481 enum counter_source source[NUM_MSR_COUNTERS];
1482 unsigned long long msr[NUM_MSR_COUNTERS];
1483 unsigned long long msr_mask[NUM_MSR_COUNTERS];
1484 int fd_perf;
1485 };
1486
1487 struct msr_counter_info_t *msr_counter_info;
1488 unsigned int msr_counter_info_size;
1489
1490 struct msr_counter_arch_info {
1491 const char *perf_subsys;
1492 const char *perf_name;
1493 unsigned long long msr;
1494 unsigned long long msr_mask;
1495 unsigned int rci_index; /* Maps data from perf counters to global variables */
1496 bool needed;
1497 bool present;
1498 };
1499
1500 enum msr_arch_info_index {
1501 MSR_ARCH_INFO_APERF_INDEX = 0,
1502 MSR_ARCH_INFO_MPERF_INDEX = 1,
1503 MSR_ARCH_INFO_SMI_INDEX = 2,
1504 };
1505
1506 static struct msr_counter_arch_info msr_counter_arch_infos[] = {
1507 [MSR_ARCH_INFO_APERF_INDEX] = {
1508 .perf_subsys = "msr",
1509 .perf_name = "aperf",
1510 .msr = MSR_IA32_APERF,
1511 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1512 .rci_index = MSR_RCI_INDEX_APERF,
1513 },
1514
1515 [MSR_ARCH_INFO_MPERF_INDEX] = {
1516 .perf_subsys = "msr",
1517 .perf_name = "mperf",
1518 .msr = MSR_IA32_MPERF,
1519 .msr_mask = 0xFFFFFFFFFFFFFFFF,
1520 .rci_index = MSR_RCI_INDEX_MPERF,
1521 },
1522
1523 [MSR_ARCH_INFO_SMI_INDEX] = {
1524 .perf_subsys = "msr",
1525 .perf_name = "smi",
1526 .msr = MSR_SMI_COUNT,
1527 .msr_mask = 0xFFFFFFFF,
1528 .rci_index = MSR_RCI_INDEX_SMI,
1529 },
1530 };
1531
1532 /* Can be redefined when compiling, useful for testing. */
1533 #ifndef SYSFS_TELEM_PATH
1534 #define SYSFS_TELEM_PATH "/sys/class/intel_pmt"
1535 #endif
1536
1537 #define PMT_COUNTER_MTL_DC6_OFFSET 120
1538 #define PMT_COUNTER_MTL_DC6_LSB 0
1539 #define PMT_COUNTER_MTL_DC6_MSB 63
1540 #define PMT_MTL_DC6_GUID 0x1a067102
1541 #define PMT_MTL_DC6_SEQ 0
1542
1543 #define PMT_COUNTER_CWF_MC1E_OFFSET_BASE 20936
1544 #define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT 24
1545 #define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12
1546 #define PMT_COUNTER_CWF_CPUS_PER_MODULE 4
1547 #define PMT_COUNTER_CWF_MC1E_LSB 0
1548 #define PMT_COUNTER_CWF_MC1E_MSB 63
1549 #define PMT_CWF_MC1E_GUID 0x14421519
1550
1551 unsigned long long tcore_clock_freq_hz = 800000000;
1552
1553 #define PMT_COUNTER_NAME_SIZE_BYTES 16
1554 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32
1555
1556 struct pmt_mmio {
1557 struct pmt_mmio *next;
1558
1559 unsigned int guid;
1560 unsigned int size;
1561
1562 /* Base pointer to the mmaped memory. */
1563 void *mmio_base;
1564
1565 /*
1566 * Offset to be applied to the mmio_base
1567 * to get the beginning of the PMT counters for given GUID.
1568 */
1569 unsigned long pmt_offset;
1570 } *pmt_mmios;
1571
1572 enum pmt_datatype {
1573 PMT_TYPE_RAW,
1574 PMT_TYPE_XTAL_TIME,
1575 PMT_TYPE_TCORE_CLOCK,
1576 };
1577
1578 struct pmt_domain_info {
1579 /*
1580 * Pointer to the MMIO obtained by applying a counter offset
1581 * to the mmio_base of the mmaped region for the given GUID.
1582 *
1583 * This is where to read the raw value of the counter from.
1584 */
1585 unsigned long *pcounter;
1586 };
1587
1588 struct pmt_counter {
1589 struct pmt_counter *next;
1590
1591 /* PMT metadata */
1592 char name[PMT_COUNTER_NAME_SIZE_BYTES];
1593 enum pmt_datatype type;
1594 enum counter_scope scope;
1595 unsigned int lsb;
1596 unsigned int msb;
1597
1598 /* BIC-like metadata */
1599 enum counter_format format;
1600
1601 unsigned int num_domains;
1602 struct pmt_domain_info *domains;
1603 };
1604
1605 /*
1606 * PMT telemetry directory iterator.
1607 * Used to iterate telemetry files in sysfs in correct order.
1608 */
1609 struct pmt_diriter_t {
1610 DIR *dir;
1611 struct dirent **namelist;
1612 unsigned int num_names;
1613 unsigned int current_name_idx;
1614 };
1615
pmt_telemdir_filter(const struct dirent * e)1616 int pmt_telemdir_filter(const struct dirent *e)
1617 {
1618 unsigned int dummy;
1619
1620 return sscanf(e->d_name, "telem%u", &dummy);
1621 }
1622
pmt_telemdir_sort(const struct dirent ** a,const struct dirent ** b)1623 int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
1624 {
1625 unsigned int aidx = 0, bidx = 0;
1626
1627 sscanf((*a)->d_name, "telem%u", &aidx);
1628 sscanf((*b)->d_name, "telem%u", &bidx);
1629
1630 return aidx >= bidx;
1631 }
1632
pmt_diriter_next(struct pmt_diriter_t * iter)1633 const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)
1634 {
1635 const struct dirent *ret = NULL;
1636
1637 if (!iter->dir)
1638 return NULL;
1639
1640 if (iter->current_name_idx >= iter->num_names)
1641 return NULL;
1642
1643 ret = iter->namelist[iter->current_name_idx];
1644 ++iter->current_name_idx;
1645
1646 return ret;
1647 }
1648
pmt_diriter_begin(struct pmt_diriter_t * iter,const char * pmt_root_path)1649 const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path)
1650 {
1651 int num_names = iter->num_names;
1652
1653 if (!iter->dir) {
1654 iter->dir = opendir(pmt_root_path);
1655 if (iter->dir == NULL)
1656 return NULL;
1657
1658 num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort);
1659 if (num_names == -1)
1660 return NULL;
1661 }
1662
1663 iter->current_name_idx = 0;
1664 iter->num_names = num_names;
1665
1666 return pmt_diriter_next(iter);
1667 }
1668
pmt_diriter_init(struct pmt_diriter_t * iter)1669 void pmt_diriter_init(struct pmt_diriter_t *iter)
1670 {
1671 memset(iter, 0, sizeof(*iter));
1672 }
1673
pmt_diriter_remove(struct pmt_diriter_t * iter)1674 void pmt_diriter_remove(struct pmt_diriter_t *iter)
1675 {
1676 if (iter->namelist) {
1677 for (unsigned int i = 0; i < iter->num_names; i++) {
1678 free(iter->namelist[i]);
1679 iter->namelist[i] = NULL;
1680 }
1681 }
1682
1683 free(iter->namelist);
1684 iter->namelist = NULL;
1685 iter->num_names = 0;
1686 iter->current_name_idx = 0;
1687
1688 closedir(iter->dir);
1689 iter->dir = NULL;
1690 }
1691
pmt_counter_get_width(const struct pmt_counter * p)1692 unsigned int pmt_counter_get_width(const struct pmt_counter *p)
1693 {
1694 return (p->msb - p->lsb) + 1;
1695 }
1696
pmt_counter_resize_(struct pmt_counter * pcounter,unsigned int new_size)1697 void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size)
1698 {
1699 struct pmt_domain_info *new_mem;
1700
1701 new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains));
1702 if (!new_mem) {
1703 fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__);
1704 exit(1);
1705 }
1706
1707 /* Zero initialize just allocated memory. */
1708 const size_t num_new_domains = new_size - pcounter->num_domains;
1709
1710 memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains));
1711
1712 pcounter->num_domains = new_size;
1713 pcounter->domains = new_mem;
1714 }
1715
pmt_counter_resize(struct pmt_counter * pcounter,unsigned int new_size)1716 void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size)
1717 {
1718 /*
1719 * Allocate more memory ahead of time.
1720 *
1721 * Always allocate space for at least 8 elements
1722 * and double the size when growing.
1723 */
1724 if (new_size < 8)
1725 new_size = 8;
1726 new_size = MAX(new_size, pcounter->num_domains * 2);
1727
1728 pmt_counter_resize_(pcounter, new_size);
1729 }
1730
1731 struct thread_data {
1732 struct timeval tv_begin;
1733 struct timeval tv_end;
1734 struct timeval tv_delta;
1735 unsigned long long tsc;
1736 unsigned long long aperf;
1737 unsigned long long mperf;
1738 unsigned long long c1;
1739 unsigned long long instr_count;
1740 unsigned long long irq_count;
1741 unsigned long long nmi_count;
1742 unsigned int smi_count;
1743 unsigned int cpu_id;
1744 unsigned int apic_id;
1745 unsigned int x2apic_id;
1746 unsigned int flags;
1747 bool is_atom;
1748 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
1749 unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS];
1750 unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS];
1751 } *thread_even, *thread_odd;
1752
1753 struct core_data {
1754 int base_cpu;
1755 unsigned long long c3;
1756 unsigned long long c6;
1757 unsigned long long c7;
1758 unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
1759 unsigned int core_temp_c;
1760 struct rapl_counter core_energy; /* MSR_CORE_ENERGY_STAT */
1761 unsigned int core_id;
1762 unsigned long long core_throt_cnt;
1763 unsigned long long counter[MAX_ADDED_CORE_COUNTERS];
1764 unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS];
1765 unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS];
1766 } *core_even, *core_odd;
1767
1768 struct pkg_data {
1769 int base_cpu;
1770 unsigned long long pc2;
1771 unsigned long long pc3;
1772 unsigned long long pc6;
1773 unsigned long long pc7;
1774 unsigned long long pc8;
1775 unsigned long long pc9;
1776 unsigned long long pc10;
1777 long long cpu_lpi;
1778 long long sys_lpi;
1779 unsigned long long pkg_wtd_core_c0;
1780 unsigned long long pkg_any_core_c0;
1781 unsigned long long pkg_any_gfxe_c0;
1782 unsigned long long pkg_both_core_gfxe_c0;
1783 long long gfx_rc6_ms;
1784 unsigned int gfx_mhz;
1785 unsigned int gfx_act_mhz;
1786 long long sam_mc6_ms;
1787 unsigned int sam_mhz;
1788 unsigned int sam_act_mhz;
1789 unsigned int package_id;
1790 struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
1791 struct rapl_counter energy_dram; /* MSR_DRAM_ENERGY_STATUS */
1792 struct rapl_counter energy_cores; /* MSR_PP0_ENERGY_STATUS */
1793 struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
1794 struct rapl_counter rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
1795 struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
1796 unsigned int pkg_temp_c;
1797 unsigned int uncore_mhz;
1798 unsigned long long die_c6;
1799 unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS];
1800 unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS];
1801 unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS];
1802 } *package_even, *package_odd;
1803
1804 #define ODD_COUNTERS thread_odd, core_odd, package_odd
1805 #define EVEN_COUNTERS thread_even, core_even, package_even
1806
1807 #define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \
1808 ((thread_base) + \
1809 ((pkg_no) * \
1810 topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
1811 ((node_no) * topo.cores_per_node * topo.threads_per_core) + \
1812 ((core_no) * topo.threads_per_core) + \
1813 (thread_no))
1814
1815 #define GET_CORE(core_base, core_no, node_no, pkg_no) \
1816 ((core_base) + \
1817 ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \
1818 ((node_no) * topo.cores_per_node) + \
1819 (core_no))
1820
1821 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
1822
1823 /*
1824 * The accumulated sum of MSR is defined as a monotonic
1825 * increasing MSR, it will be accumulated periodically,
1826 * despite its register's bit width.
1827 */
1828 enum {
1829 IDX_PKG_ENERGY,
1830 IDX_DRAM_ENERGY,
1831 IDX_PP0_ENERGY,
1832 IDX_PP1_ENERGY,
1833 IDX_PKG_PERF,
1834 IDX_DRAM_PERF,
1835 IDX_PSYS_ENERGY,
1836 IDX_COUNT,
1837 };
1838
1839 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
1840
1841 struct msr_sum_array {
1842 /* get_msr_sum() = sum + (get_msr() - last) */
1843 struct {
1844 /*The accumulated MSR value is updated by the timer */
1845 unsigned long long sum;
1846 /*The MSR footprint recorded in last timer */
1847 unsigned long long last;
1848 } entries[IDX_COUNT];
1849 };
1850
1851 /* The percpu MSR sum array.*/
1852 struct msr_sum_array *per_cpu_msr_sum;
1853
idx_to_offset(int idx)1854 off_t idx_to_offset(int idx)
1855 {
1856 off_t offset;
1857
1858 switch (idx) {
1859 case IDX_PKG_ENERGY:
1860 if (platform->rapl_msrs & RAPL_AMD_F17H)
1861 offset = MSR_PKG_ENERGY_STAT;
1862 else
1863 offset = MSR_PKG_ENERGY_STATUS;
1864 break;
1865 case IDX_DRAM_ENERGY:
1866 offset = MSR_DRAM_ENERGY_STATUS;
1867 break;
1868 case IDX_PP0_ENERGY:
1869 offset = MSR_PP0_ENERGY_STATUS;
1870 break;
1871 case IDX_PP1_ENERGY:
1872 offset = MSR_PP1_ENERGY_STATUS;
1873 break;
1874 case IDX_PKG_PERF:
1875 offset = MSR_PKG_PERF_STATUS;
1876 break;
1877 case IDX_DRAM_PERF:
1878 offset = MSR_DRAM_PERF_STATUS;
1879 break;
1880 case IDX_PSYS_ENERGY:
1881 offset = MSR_PLATFORM_ENERGY_STATUS;
1882 break;
1883 default:
1884 offset = -1;
1885 }
1886 return offset;
1887 }
1888
offset_to_idx(off_t offset)1889 int offset_to_idx(off_t offset)
1890 {
1891 int idx;
1892
1893 switch (offset) {
1894 case MSR_PKG_ENERGY_STATUS:
1895 case MSR_PKG_ENERGY_STAT:
1896 idx = IDX_PKG_ENERGY;
1897 break;
1898 case MSR_DRAM_ENERGY_STATUS:
1899 idx = IDX_DRAM_ENERGY;
1900 break;
1901 case MSR_PP0_ENERGY_STATUS:
1902 idx = IDX_PP0_ENERGY;
1903 break;
1904 case MSR_PP1_ENERGY_STATUS:
1905 idx = IDX_PP1_ENERGY;
1906 break;
1907 case MSR_PKG_PERF_STATUS:
1908 idx = IDX_PKG_PERF;
1909 break;
1910 case MSR_DRAM_PERF_STATUS:
1911 idx = IDX_DRAM_PERF;
1912 break;
1913 case MSR_PLATFORM_ENERGY_STATUS:
1914 idx = IDX_PSYS_ENERGY;
1915 break;
1916 default:
1917 idx = -1;
1918 }
1919 return idx;
1920 }
1921
idx_valid(int idx)1922 int idx_valid(int idx)
1923 {
1924 switch (idx) {
1925 case IDX_PKG_ENERGY:
1926 return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H);
1927 case IDX_DRAM_ENERGY:
1928 return platform->rapl_msrs & RAPL_DRAM;
1929 case IDX_PP0_ENERGY:
1930 return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS;
1931 case IDX_PP1_ENERGY:
1932 return platform->rapl_msrs & RAPL_GFX;
1933 case IDX_PKG_PERF:
1934 return platform->rapl_msrs & RAPL_PKG_PERF_STATUS;
1935 case IDX_DRAM_PERF:
1936 return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS;
1937 case IDX_PSYS_ENERGY:
1938 return platform->rapl_msrs & RAPL_PSYS;
1939 default:
1940 return 0;
1941 }
1942 }
1943
1944 struct sys_counters {
1945 /* MSR added counters */
1946 unsigned int added_thread_counters;
1947 unsigned int added_core_counters;
1948 unsigned int added_package_counters;
1949 struct msr_counter *tp;
1950 struct msr_counter *cp;
1951 struct msr_counter *pp;
1952
1953 /* perf added counters */
1954 unsigned int added_thread_perf_counters;
1955 unsigned int added_core_perf_counters;
1956 unsigned int added_package_perf_counters;
1957 struct perf_counter_info *perf_tp;
1958 struct perf_counter_info *perf_cp;
1959 struct perf_counter_info *perf_pp;
1960
1961 struct pmt_counter *pmt_tp;
1962 struct pmt_counter *pmt_cp;
1963 struct pmt_counter *pmt_pp;
1964 } sys;
1965
free_msr_counters_(struct msr_counter ** pp)1966 static size_t free_msr_counters_(struct msr_counter **pp)
1967 {
1968 struct msr_counter *p = NULL;
1969 size_t num_freed = 0;
1970
1971 while (*pp) {
1972 p = *pp;
1973
1974 if (p->msr_num != 0) {
1975 *pp = p->next;
1976
1977 free(p);
1978 ++num_freed;
1979
1980 continue;
1981 }
1982
1983 pp = &p->next;
1984 }
1985
1986 return num_freed;
1987 }
1988
1989 /*
1990 * Free all added counters accessed via msr.
1991 */
free_sys_msr_counters(void)1992 static void free_sys_msr_counters(void)
1993 {
1994 /* Thread counters */
1995 sys.added_thread_counters -= free_msr_counters_(&sys.tp);
1996
1997 /* Core counters */
1998 sys.added_core_counters -= free_msr_counters_(&sys.cp);
1999
2000 /* Package counters */
2001 sys.added_package_counters -= free_msr_counters_(&sys.pp);
2002 }
2003
2004 struct system_summary {
2005 struct thread_data threads;
2006 struct core_data cores;
2007 struct pkg_data packages;
2008 } average;
2009
2010 struct platform_counters {
2011 struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */
2012 } platform_counters_odd, platform_counters_even;
2013
2014 struct cpu_topology {
2015 int physical_package_id;
2016 int die_id;
2017 int logical_cpu_id;
2018 int physical_node_id;
2019 int logical_node_id; /* 0-based count within the package */
2020 int physical_core_id;
2021 int thread_id;
2022 int type;
2023 cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
2024 } *cpus;
2025
2026 struct topo_params {
2027 int num_packages;
2028 int num_die;
2029 int num_cpus;
2030 int num_cores;
2031 int allowed_packages;
2032 int allowed_cpus;
2033 int allowed_cores;
2034 int max_cpu_num;
2035 int max_core_id;
2036 int max_package_id;
2037 int max_die_id;
2038 int max_node_num;
2039 int nodes_per_pkg;
2040 int cores_per_node;
2041 int threads_per_core;
2042 } topo;
2043
2044 struct timeval tv_even, tv_odd, tv_delta;
2045
2046 int *irq_column_2_cpu; /* /proc/interrupts column numbers */
2047 int *irqs_per_cpu; /* indexed by cpu_num */
2048 int *nmi_per_cpu; /* indexed by cpu_num */
2049
2050 void setup_all_buffers(bool startup);
2051
2052 char *sys_lpi_file;
2053 char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
2054 char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
2055
cpu_is_not_present(int cpu)2056 int cpu_is_not_present(int cpu)
2057 {
2058 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
2059 }
2060
cpu_is_not_allowed(int cpu)2061 int cpu_is_not_allowed(int cpu)
2062 {
2063 return !CPU_ISSET_S(cpu, cpu_allowed_setsize, cpu_allowed_set);
2064 }
2065
2066 /*
2067 * run func(thread, core, package) in topology order
2068 * skip non-present cpus
2069 */
2070
for_all_cpus(int (func)(struct thread_data *,struct core_data *,struct pkg_data *),struct thread_data * thread_base,struct core_data * core_base,struct pkg_data * pkg_base)2071 int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
2072 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
2073 {
2074 int retval, pkg_no, core_no, thread_no, node_no;
2075
2076 retval = 0;
2077
2078 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
2079 for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
2080 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
2081 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
2082 struct thread_data *t;
2083 struct core_data *c;
2084 struct pkg_data *p;
2085 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
2086
2087 if (cpu_is_not_allowed(t->cpu_id))
2088 continue;
2089
2090 c = GET_CORE(core_base, core_no, node_no, pkg_no);
2091 p = GET_PKG(pkg_base, pkg_no);
2092
2093 retval |= func(t, c, p);
2094 }
2095 }
2096 }
2097 }
2098 return retval;
2099 }
2100
is_cpu_first_thread_in_core(struct thread_data * t,struct core_data * c,struct pkg_data * p)2101 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2102 {
2103 UNUSED(p);
2104
2105 return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0);
2106 }
2107
is_cpu_first_core_in_package(struct thread_data * t,struct core_data * c,struct pkg_data * p)2108 int is_cpu_first_core_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2109 {
2110 UNUSED(c);
2111
2112 return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0);
2113 }
2114
is_cpu_first_thread_in_package(struct thread_data * t,struct core_data * c,struct pkg_data * p)2115 int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2116 {
2117 return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p);
2118 }
2119
cpu_migrate(int cpu)2120 int cpu_migrate(int cpu)
2121 {
2122 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
2123 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
2124 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
2125 return -1;
2126 else
2127 return 0;
2128 }
2129
get_msr_fd(int cpu)2130 int get_msr_fd(int cpu)
2131 {
2132 char pathname[32];
2133 int fd;
2134
2135 fd = fd_percpu[cpu];
2136
2137 if (fd)
2138 return fd;
2139
2140 sprintf(pathname, "/dev/cpu/%d/msr", cpu);
2141 fd = open(pathname, O_RDONLY);
2142 if (fd < 0)
2143 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
2144 "or run with --no-msr, or run as root", pathname);
2145
2146 fd_percpu[cpu] = fd;
2147
2148 return fd;
2149 }
2150
bic_disable_msr_access(void)2151 static void bic_disable_msr_access(void)
2152 {
2153 const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp |
2154 BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp;
2155
2156 bic_enabled &= ~bic_msrs;
2157
2158 free_sys_msr_counters();
2159 }
2160
perf_event_open(struct perf_event_attr * hw_event,pid_t pid,int cpu,int group_fd,unsigned long flags)2161 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
2162 {
2163 assert(!no_perf);
2164
2165 return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
2166 }
2167
open_perf_counter(int cpu,unsigned int type,unsigned int config,int group_fd,__u64 read_format)2168 static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
2169 {
2170 struct perf_event_attr attr;
2171 const pid_t pid = -1;
2172 const unsigned long flags = 0;
2173
2174 assert(!no_perf);
2175
2176 memset(&attr, 0, sizeof(struct perf_event_attr));
2177
2178 attr.type = type;
2179 attr.size = sizeof(struct perf_event_attr);
2180 attr.config = config;
2181 attr.disabled = 0;
2182 attr.sample_type = PERF_SAMPLE_IDENTIFIER;
2183 attr.read_format = read_format;
2184
2185 const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
2186
2187 return fd;
2188 }
2189
get_instr_count_fd(int cpu)2190 int get_instr_count_fd(int cpu)
2191 {
2192 if (fd_instr_count_percpu[cpu])
2193 return fd_instr_count_percpu[cpu];
2194
2195 fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
2196
2197 return fd_instr_count_percpu[cpu];
2198 }
2199
get_msr(int cpu,off_t offset,unsigned long long * msr)2200 int get_msr(int cpu, off_t offset, unsigned long long *msr)
2201 {
2202 ssize_t retval;
2203
2204 assert(!no_msr);
2205
2206 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
2207
2208 if (retval != sizeof *msr)
2209 err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
2210
2211 return 0;
2212 }
2213
probe_rapl_msr(int cpu,off_t offset,int index)2214 int probe_rapl_msr(int cpu, off_t offset, int index)
2215 {
2216 ssize_t retval;
2217 unsigned long long value;
2218
2219 assert(!no_msr);
2220
2221 retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
2222
2223 /* if the read failed, the probe fails */
2224 if (retval != sizeof(value))
2225 return 1;
2226
2227 /* If an Energy Status Counter MSR returns 0, the probe fails */
2228 switch (index) {
2229 case RAPL_RCI_INDEX_ENERGY_PKG:
2230 case RAPL_RCI_INDEX_ENERGY_CORES:
2231 case RAPL_RCI_INDEX_DRAM:
2232 case RAPL_RCI_INDEX_GFX:
2233 case RAPL_RCI_INDEX_ENERGY_PLATFORM:
2234 if (value == 0)
2235 return 1;
2236 }
2237
2238 /* PKG,DRAM_PERF_STATUS MSRs, can return any value */
2239 return 0;
2240 }
2241
2242 /* Convert CPU ID to domain ID for given added perf counter. */
cpu_to_domain(const struct perf_counter_info * pc,int cpu)2243 unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu)
2244 {
2245 switch (pc->scope) {
2246 case SCOPE_CPU:
2247 return cpu;
2248
2249 case SCOPE_CORE:
2250 return cpus[cpu].physical_core_id;
2251
2252 case SCOPE_PACKAGE:
2253 return cpus[cpu].physical_package_id;
2254 }
2255
2256 __builtin_unreachable();
2257 }
2258
2259 #define MAX_DEFERRED 16
2260 char *deferred_add_names[MAX_DEFERRED];
2261 char *deferred_skip_names[MAX_DEFERRED];
2262 int deferred_add_index;
2263 int deferred_skip_index;
2264
2265 /*
2266 * HIDE_LIST - hide this list of counters, show the rest [default]
2267 * SHOW_LIST - show this list of counters, hide the rest
2268 */
2269 enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
2270
help(void)2271 void help(void)
2272 {
2273 fprintf(outf,
2274 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
2275 "\n"
2276 "Turbostat forks the specified COMMAND and prints statistics\n"
2277 "when COMMAND completes.\n"
2278 "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
2279 "to print statistics, until interrupted.\n"
2280 " -a, --add counter\n"
2281 " add a counter\n"
2282 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
2283 " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n"
2284 " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n"
2285 " -c, --cpu cpu-set\n"
2286 " limit output to summary plus cpu-set:\n"
2287 " {core | package | j,k,l..m,n-p }\n"
2288 " -d, --debug\n"
2289 " displays usec, Time_Of_Day_Seconds and more debugging\n"
2290 " debug messages are printed to stderr\n"
2291 " -D, --Dump\n"
2292 " displays the raw counter values\n"
2293 " -e, --enable [all | column]\n"
2294 " shows all or the specified disabled column\n"
2295 " -f, --force\n"
2296 " force load turbostat with minimum default features on unsupported platforms.\n"
2297 " -H, --hide [column | column,column,...]\n"
2298 " hide the specified column(s)\n"
2299 " -i, --interval sec.subsec\n"
2300 " override default 5-second measurement interval\n"
2301 " -J, --Joules\n"
2302 " displays energy in Joules instead of Watts\n"
2303 " -l, --list\n"
2304 " list column headers only\n"
2305 " -M, --no-msr\n"
2306 " disable all uses of the MSR driver\n"
2307 " -P, --no-perf\n"
2308 " disable all uses of the perf API\n"
2309 " -n, --num_iterations num\n"
2310 " number of the measurement iterations\n"
2311 " -N, --header_iterations num\n"
2312 " print header every num iterations\n"
2313 " -o, --out file\n"
2314 " create or truncate \"file\" for all output\n"
2315 " -q, --quiet\n"
2316 " skip decoding system configuration header\n"
2317 " -s, --show [column | column,column,...]\n"
2318 " show only the specified column(s)\n"
2319 " -S, --Summary\n"
2320 " limits output to 1-line system summary per interval\n"
2321 " -T, --TCC temperature\n"
2322 " sets the Thermal Control Circuit temperature in\n"
2323 " degrees Celsius\n"
2324 " -h, --help\n"
2325 " print this help message\n"
2326 " -v, --version\n"
2327 " print version information\n\nFor more help, run \"man turbostat\"\n");
2328 }
2329
2330 /*
2331 * bic_lookup
2332 * for all the strings in comma separate name_list,
2333 * set the approprate bit in return value.
2334 */
bic_lookup(char * name_list,enum show_hide_mode mode)2335 unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
2336 {
2337 unsigned int i;
2338 unsigned long long retval = 0;
2339
2340 while (name_list) {
2341 char *comma;
2342
2343 comma = strchr(name_list, ',');
2344
2345 if (comma)
2346 *comma = '\0';
2347
2348 for (i = 0; i < MAX_BIC; ++i) {
2349 if (!strcmp(name_list, bic[i].name)) {
2350 retval |= (1ULL << i);
2351 break;
2352 }
2353 if (!strcmp(name_list, "all")) {
2354 retval |= ~0;
2355 break;
2356 } else if (!strcmp(name_list, "topology")) {
2357 retval |= BIC_TOPOLOGY;
2358 break;
2359 } else if (!strcmp(name_list, "power")) {
2360 retval |= BIC_THERMAL_PWR;
2361 break;
2362 } else if (!strcmp(name_list, "idle")) {
2363 retval |= BIC_IDLE;
2364 break;
2365 } else if (!strcmp(name_list, "frequency")) {
2366 retval |= BIC_FREQUENCY;
2367 break;
2368 } else if (!strcmp(name_list, "other")) {
2369 retval |= BIC_OTHER;
2370 break;
2371 }
2372
2373 }
2374 if (i == MAX_BIC) {
2375 if (mode == SHOW_LIST) {
2376 deferred_add_names[deferred_add_index++] = name_list;
2377 if (deferred_add_index >= MAX_DEFERRED) {
2378 fprintf(stderr, "More than max %d un-recognized --add options '%s'\n",
2379 MAX_DEFERRED, name_list);
2380 help();
2381 exit(1);
2382 }
2383 } else {
2384 deferred_skip_names[deferred_skip_index++] = name_list;
2385 if (debug)
2386 fprintf(stderr, "deferred \"%s\"\n", name_list);
2387 if (deferred_skip_index >= MAX_DEFERRED) {
2388 fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
2389 MAX_DEFERRED, name_list);
2390 help();
2391 exit(1);
2392 }
2393 }
2394 }
2395
2396 name_list = comma;
2397 if (name_list)
2398 name_list++;
2399
2400 }
2401 return retval;
2402 }
2403
print_header(char * delim)2404 void print_header(char *delim)
2405 {
2406 struct msr_counter *mp;
2407 struct perf_counter_info *pp;
2408 struct pmt_counter *ppmt;
2409 int printed = 0;
2410
2411 if (DO_BIC(BIC_USEC))
2412 outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
2413 if (DO_BIC(BIC_TOD))
2414 outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
2415 if (DO_BIC(BIC_Package))
2416 outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
2417 if (DO_BIC(BIC_Die))
2418 outp += sprintf(outp, "%sDie", (printed++ ? delim : ""));
2419 if (DO_BIC(BIC_Node))
2420 outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
2421 if (DO_BIC(BIC_Core))
2422 outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
2423 if (DO_BIC(BIC_CPU))
2424 outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
2425 if (DO_BIC(BIC_APIC))
2426 outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
2427 if (DO_BIC(BIC_X2APIC))
2428 outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
2429 if (DO_BIC(BIC_Avg_MHz))
2430 outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
2431 if (DO_BIC(BIC_Busy))
2432 outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
2433 if (DO_BIC(BIC_Bzy_MHz))
2434 outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
2435 if (DO_BIC(BIC_TSC_MHz))
2436 outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
2437
2438 if (DO_BIC(BIC_IPC))
2439 outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
2440
2441 if (DO_BIC(BIC_IRQ)) {
2442 if (sums_need_wide_columns)
2443 outp += sprintf(outp, "%s IRQ", (printed++ ? delim : ""));
2444 else
2445 outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
2446 }
2447 if (DO_BIC(BIC_NMI)) {
2448 if (sums_need_wide_columns)
2449 outp += sprintf(outp, "%s NMI", (printed++ ? delim : ""));
2450 else
2451 outp += sprintf(outp, "%sNMI", (printed++ ? delim : ""));
2452 }
2453
2454 if (DO_BIC(BIC_SMI))
2455 outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
2456
2457 for (mp = sys.tp; mp; mp = mp->next) {
2458
2459 if (mp->format == FORMAT_RAW) {
2460 if (mp->width == 64)
2461 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
2462 else
2463 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
2464 } else {
2465 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2466 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
2467 else
2468 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
2469 }
2470 }
2471
2472 for (pp = sys.perf_tp; pp; pp = pp->next) {
2473
2474 if (pp->format == FORMAT_RAW) {
2475 if (pp->width == 64)
2476 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
2477 else
2478 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
2479 } else {
2480 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2481 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
2482 else
2483 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
2484 }
2485 }
2486
2487 ppmt = sys.pmt_tp;
2488 while (ppmt) {
2489 switch (ppmt->type) {
2490 case PMT_TYPE_RAW:
2491 if (pmt_counter_get_width(ppmt) <= 32)
2492 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
2493 else
2494 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
2495
2496 break;
2497
2498 case PMT_TYPE_XTAL_TIME:
2499 case PMT_TYPE_TCORE_CLOCK:
2500 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
2501 break;
2502 }
2503
2504 ppmt = ppmt->next;
2505 }
2506
2507 if (DO_BIC(BIC_CPU_c1))
2508 outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
2509 if (DO_BIC(BIC_CPU_c3))
2510 outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
2511 if (DO_BIC(BIC_CPU_c6))
2512 outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
2513 if (DO_BIC(BIC_CPU_c7))
2514 outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
2515
2516 if (DO_BIC(BIC_Mod_c6))
2517 outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
2518
2519 if (DO_BIC(BIC_CoreTmp))
2520 outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
2521
2522 if (DO_BIC(BIC_CORE_THROT_CNT))
2523 outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : ""));
2524
2525 if (platform->rapl_msrs && !rapl_joules) {
2526 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
2527 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
2528 } else if (platform->rapl_msrs && rapl_joules) {
2529 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
2530 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
2531 }
2532
2533 for (mp = sys.cp; mp; mp = mp->next) {
2534 if (mp->format == FORMAT_RAW) {
2535 if (mp->width == 64)
2536 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
2537 else
2538 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
2539 } else {
2540 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2541 outp += sprintf(outp, "%s%8s", delim, mp->name);
2542 else
2543 outp += sprintf(outp, "%s%s", delim, mp->name);
2544 }
2545 }
2546
2547 for (pp = sys.perf_cp; pp; pp = pp->next) {
2548
2549 if (pp->format == FORMAT_RAW) {
2550 if (pp->width == 64)
2551 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
2552 else
2553 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
2554 } else {
2555 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2556 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
2557 else
2558 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
2559 }
2560 }
2561
2562 ppmt = sys.pmt_cp;
2563 while (ppmt) {
2564 switch (ppmt->type) {
2565 case PMT_TYPE_RAW:
2566 if (pmt_counter_get_width(ppmt) <= 32)
2567 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
2568 else
2569 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
2570
2571 break;
2572
2573 case PMT_TYPE_XTAL_TIME:
2574 case PMT_TYPE_TCORE_CLOCK:
2575 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
2576 break;
2577 }
2578
2579 ppmt = ppmt->next;
2580 }
2581
2582 if (DO_BIC(BIC_PkgTmp))
2583 outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
2584
2585 if (DO_BIC(BIC_GFX_rc6))
2586 outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
2587
2588 if (DO_BIC(BIC_GFXMHz))
2589 outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
2590
2591 if (DO_BIC(BIC_GFXACTMHz))
2592 outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
2593
2594 if (DO_BIC(BIC_SAM_mc6))
2595 outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
2596
2597 if (DO_BIC(BIC_SAMMHz))
2598 outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
2599
2600 if (DO_BIC(BIC_SAMACTMHz))
2601 outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
2602
2603 if (DO_BIC(BIC_Totl_c0))
2604 outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
2605 if (DO_BIC(BIC_Any_c0))
2606 outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
2607 if (DO_BIC(BIC_GFX_c0))
2608 outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
2609 if (DO_BIC(BIC_CPUGFX))
2610 outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
2611
2612 if (DO_BIC(BIC_Pkgpc2))
2613 outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
2614 if (DO_BIC(BIC_Pkgpc3))
2615 outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
2616 if (DO_BIC(BIC_Pkgpc6))
2617 outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
2618 if (DO_BIC(BIC_Pkgpc7))
2619 outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
2620 if (DO_BIC(BIC_Pkgpc8))
2621 outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
2622 if (DO_BIC(BIC_Pkgpc9))
2623 outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
2624 if (DO_BIC(BIC_Pkgpc10))
2625 outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
2626 if (DO_BIC(BIC_Diec6))
2627 outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : ""));
2628 if (DO_BIC(BIC_CPU_LPI))
2629 outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
2630 if (DO_BIC(BIC_SYS_LPI))
2631 outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
2632
2633 if (platform->rapl_msrs && !rapl_joules) {
2634 if (DO_BIC(BIC_PkgWatt))
2635 outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
2636 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
2637 outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
2638 if (DO_BIC(BIC_GFXWatt))
2639 outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
2640 if (DO_BIC(BIC_RAMWatt))
2641 outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
2642 if (DO_BIC(BIC_PKG__))
2643 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
2644 if (DO_BIC(BIC_RAM__))
2645 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
2646 } else if (platform->rapl_msrs && rapl_joules) {
2647 if (DO_BIC(BIC_Pkg_J))
2648 outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
2649 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
2650 outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
2651 if (DO_BIC(BIC_GFX_J))
2652 outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
2653 if (DO_BIC(BIC_RAM_J))
2654 outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
2655 if (DO_BIC(BIC_PKG__))
2656 outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
2657 if (DO_BIC(BIC_RAM__))
2658 outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
2659 }
2660 if (DO_BIC(BIC_UNCORE_MHZ))
2661 outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : ""));
2662
2663 for (mp = sys.pp; mp; mp = mp->next) {
2664 if (mp->format == FORMAT_RAW) {
2665 if (mp->width == 64)
2666 outp += sprintf(outp, "%s%18.18s", delim, mp->name);
2667 else if (mp->width == 32)
2668 outp += sprintf(outp, "%s%10.10s", delim, mp->name);
2669 else
2670 outp += sprintf(outp, "%s%7.7s", delim, mp->name);
2671 } else {
2672 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2673 outp += sprintf(outp, "%s%8s", delim, mp->name);
2674 else
2675 outp += sprintf(outp, "%s%7.7s", delim, mp->name);
2676 }
2677 }
2678
2679 for (pp = sys.perf_pp; pp; pp = pp->next) {
2680
2681 if (pp->format == FORMAT_RAW) {
2682 if (pp->width == 64)
2683 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name);
2684 else
2685 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name);
2686 } else {
2687 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2688 outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name);
2689 else
2690 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name);
2691 }
2692 }
2693
2694 ppmt = sys.pmt_pp;
2695 while (ppmt) {
2696 switch (ppmt->type) {
2697 case PMT_TYPE_RAW:
2698 if (pmt_counter_get_width(ppmt) <= 32)
2699 outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name);
2700 else
2701 outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name);
2702
2703 break;
2704
2705 case PMT_TYPE_XTAL_TIME:
2706 case PMT_TYPE_TCORE_CLOCK:
2707 outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
2708 break;
2709 }
2710
2711 ppmt = ppmt->next;
2712 }
2713
2714 if (DO_BIC(BIC_SysWatt))
2715 outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : ""));
2716 if (DO_BIC(BIC_Sys_J))
2717 outp += sprintf(outp, "%sSys_J", (printed++ ? delim : ""));
2718
2719 outp += sprintf(outp, "\n");
2720 }
2721
dump_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)2722 int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2723 {
2724 int i;
2725 struct msr_counter *mp;
2726 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even;
2727
2728 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
2729
2730 if (t) {
2731 outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
2732 outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
2733 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
2734 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
2735 outp += sprintf(outp, "c1: %016llX\n", t->c1);
2736
2737 if (DO_BIC(BIC_IPC))
2738 outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
2739
2740 if (DO_BIC(BIC_IRQ))
2741 outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
2742 if (DO_BIC(BIC_NMI))
2743 outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count);
2744 if (DO_BIC(BIC_SMI))
2745 outp += sprintf(outp, "SMI: %d\n", t->smi_count);
2746
2747 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2748 outp +=
2749 sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
2750 t->counter[i], mp->sp->path);
2751 }
2752 }
2753
2754 if (c && is_cpu_first_thread_in_core(t, c, p)) {
2755 outp += sprintf(outp, "core: %d\n", c->core_id);
2756 outp += sprintf(outp, "c3: %016llX\n", c->c3);
2757 outp += sprintf(outp, "c6: %016llX\n", c->c6);
2758 outp += sprintf(outp, "c7: %016llX\n", c->c7);
2759 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
2760 outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
2761
2762 const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
2763 const double energy_scale = c->core_energy.scale;
2764
2765 if (c->core_energy.unit == RAPL_UNIT_JOULES)
2766 outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
2767
2768 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
2769 outp +=
2770 sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
2771 c->counter[i], mp->sp->path);
2772 }
2773 outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
2774 }
2775
2776 if (p && is_cpu_first_core_in_package(t, c, p)) {
2777 outp += sprintf(outp, "package: %d\n", p->package_id);
2778
2779 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
2780 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
2781 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
2782 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
2783
2784 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
2785 if (DO_BIC(BIC_Pkgpc3))
2786 outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
2787 if (DO_BIC(BIC_Pkgpc6))
2788 outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
2789 if (DO_BIC(BIC_Pkgpc7))
2790 outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
2791 outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
2792 outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
2793 outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
2794 outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
2795 outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
2796 outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
2797 outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
2798 outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
2799 outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
2800 outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value);
2801 outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
2802 outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
2803 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
2804
2805 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
2806 outp +=
2807 sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
2808 p->counter[i], mp->sp->path);
2809 }
2810 }
2811
2812 outp += sprintf(outp, "\n");
2813
2814 return 0;
2815 }
2816
rapl_counter_get_value(const struct rapl_counter * c,enum rapl_unit desired_unit,double interval)2817 double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
2818 {
2819 assert(desired_unit != RAPL_UNIT_INVALID);
2820
2821 /*
2822 * For now we don't expect anything other than joules,
2823 * so just simplify the logic.
2824 */
2825 assert(c->unit == RAPL_UNIT_JOULES);
2826
2827 const double scaled = c->raw_value * c->scale;
2828
2829 if (desired_unit == RAPL_UNIT_WATTS)
2830 return scaled / interval;
2831 return scaled;
2832 }
2833
2834 /*
2835 * column formatting convention & formats
2836 */
format_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)2837 int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2838 {
2839 static int count;
2840
2841 struct platform_counters *pplat_cnt = NULL;
2842 double interval_float, tsc;
2843 char *fmt8;
2844 int i;
2845 struct msr_counter *mp;
2846 struct perf_counter_info *pp;
2847 struct pmt_counter *ppmt;
2848 char *delim = "\t";
2849 int printed = 0;
2850
2851 if (t == &average.threads) {
2852 pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even;
2853 ++count;
2854 }
2855
2856 /* if showing only 1st thread in core and this isn't one, bail out */
2857 if (show_core_only && !is_cpu_first_thread_in_core(t, c, p))
2858 return 0;
2859
2860 /* if showing only 1st thread in pkg and this isn't one, bail out */
2861 if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p))
2862 return 0;
2863
2864 /*if not summary line and --cpu is used */
2865 if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
2866 return 0;
2867
2868 if (DO_BIC(BIC_USEC)) {
2869 /* on each row, print how many usec each timestamp took to gather */
2870 struct timeval tv;
2871
2872 timersub(&t->tv_end, &t->tv_begin, &tv);
2873 outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
2874 }
2875
2876 /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
2877 if (DO_BIC(BIC_TOD))
2878 outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
2879
2880 interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
2881
2882 tsc = t->tsc * tsc_tweak;
2883
2884 /* topo columns, print blanks on 1st (average) line */
2885 if (t == &average.threads) {
2886 if (DO_BIC(BIC_Package))
2887 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2888 if (DO_BIC(BIC_Die))
2889 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2890 if (DO_BIC(BIC_Node))
2891 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2892 if (DO_BIC(BIC_Core))
2893 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2894 if (DO_BIC(BIC_CPU))
2895 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2896 if (DO_BIC(BIC_APIC))
2897 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2898 if (DO_BIC(BIC_X2APIC))
2899 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2900 } else {
2901 if (DO_BIC(BIC_Package)) {
2902 if (p)
2903 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
2904 else
2905 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2906 }
2907 if (DO_BIC(BIC_Die)) {
2908 if (c)
2909 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id);
2910 else
2911 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2912 }
2913 if (DO_BIC(BIC_Node)) {
2914 if (t)
2915 outp += sprintf(outp, "%s%d",
2916 (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
2917 else
2918 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2919 }
2920 if (DO_BIC(BIC_Core)) {
2921 if (c)
2922 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
2923 else
2924 outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
2925 }
2926 if (DO_BIC(BIC_CPU))
2927 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
2928 if (DO_BIC(BIC_APIC))
2929 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
2930 if (DO_BIC(BIC_X2APIC))
2931 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
2932 }
2933
2934 if (DO_BIC(BIC_Avg_MHz))
2935 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
2936
2937 if (DO_BIC(BIC_Busy))
2938 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
2939
2940 if (DO_BIC(BIC_Bzy_MHz)) {
2941 if (has_base_hz)
2942 outp +=
2943 sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
2944 else
2945 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
2946 tsc / units * t->aperf / t->mperf / interval_float);
2947 }
2948
2949 if (DO_BIC(BIC_TSC_MHz))
2950 outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
2951
2952 if (DO_BIC(BIC_IPC))
2953 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
2954
2955 /* IRQ */
2956 if (DO_BIC(BIC_IRQ)) {
2957 if (sums_need_wide_columns)
2958 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
2959 else
2960 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
2961 }
2962
2963 /* NMI */
2964 if (DO_BIC(BIC_NMI)) {
2965 if (sums_need_wide_columns)
2966 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count);
2967 else
2968 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count);
2969 }
2970
2971 /* SMI */
2972 if (DO_BIC(BIC_SMI))
2973 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
2974
2975 /* Added counters */
2976 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
2977 if (mp->format == FORMAT_RAW) {
2978 if (mp->width == 32)
2979 outp +=
2980 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
2981 else
2982 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
2983 } else if (mp->format == FORMAT_DELTA) {
2984 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
2985 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
2986 else
2987 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
2988 } else if (mp->format == FORMAT_PERCENT) {
2989 if (mp->type == COUNTER_USEC)
2990 outp +=
2991 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
2992 t->counter[i] / interval_float / 10000);
2993 else
2994 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
2995 }
2996 }
2997
2998 /* Added perf counters */
2999 for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) {
3000 if (pp->format == FORMAT_RAW) {
3001 if (pp->width == 32)
3002 outp +=
3003 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3004 (unsigned int)t->perf_counter[i]);
3005 else
3006 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]);
3007 } else if (pp->format == FORMAT_DELTA) {
3008 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3009 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]);
3010 else
3011 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]);
3012 } else if (pp->format == FORMAT_PERCENT) {
3013 if (pp->type == COUNTER_USEC)
3014 outp +=
3015 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3016 t->perf_counter[i] / interval_float / 10000);
3017 else
3018 outp +=
3019 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc);
3020 }
3021 }
3022
3023 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
3024 const unsigned long value_raw = t->pmt_counter[i];
3025 double value_converted;
3026 switch (ppmt->type) {
3027 case PMT_TYPE_RAW:
3028 if (pmt_counter_get_width(ppmt) <= 32)
3029 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3030 (unsigned int)t->pmt_counter[i]);
3031 else
3032 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]);
3033
3034 break;
3035
3036 case PMT_TYPE_XTAL_TIME:
3037 value_converted = 100.0 * value_raw / crystal_hz / interval_float;
3038 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3039 break;
3040
3041 case PMT_TYPE_TCORE_CLOCK:
3042 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
3043 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3044 }
3045 }
3046
3047 /* C1 */
3048 if (DO_BIC(BIC_CPU_c1))
3049 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
3050
3051 /* print per-core data only for 1st thread in core */
3052 if (!is_cpu_first_thread_in_core(t, c, p))
3053 goto done;
3054
3055 if (DO_BIC(BIC_CPU_c3))
3056 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
3057 if (DO_BIC(BIC_CPU_c6))
3058 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
3059 if (DO_BIC(BIC_CPU_c7))
3060 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
3061
3062 /* Mod%c6 */
3063 if (DO_BIC(BIC_Mod_c6))
3064 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
3065
3066 if (DO_BIC(BIC_CoreTmp))
3067 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
3068
3069 /* Core throttle count */
3070 if (DO_BIC(BIC_CORE_THROT_CNT))
3071 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt);
3072
3073 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3074 if (mp->format == FORMAT_RAW) {
3075 if (mp->width == 32)
3076 outp +=
3077 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
3078 else
3079 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
3080 } else if (mp->format == FORMAT_DELTA) {
3081 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3082 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
3083 else
3084 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
3085 } else if (mp->format == FORMAT_PERCENT) {
3086 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
3087 }
3088 }
3089
3090 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
3091 if (pp->format == FORMAT_RAW) {
3092 if (pp->width == 32)
3093 outp +=
3094 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3095 (unsigned int)c->perf_counter[i]);
3096 else
3097 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]);
3098 } else if (pp->format == FORMAT_DELTA) {
3099 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3100 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]);
3101 else
3102 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]);
3103 } else if (pp->format == FORMAT_PERCENT) {
3104 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc);
3105 }
3106 }
3107
3108 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
3109 const unsigned long value_raw = c->pmt_counter[i];
3110 double value_converted;
3111 switch (ppmt->type) {
3112 case PMT_TYPE_RAW:
3113 if (pmt_counter_get_width(ppmt) <= 32)
3114 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3115 (unsigned int)c->pmt_counter[i]);
3116 else
3117 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]);
3118
3119 break;
3120
3121 case PMT_TYPE_XTAL_TIME:
3122 value_converted = 100.0 * value_raw / crystal_hz / interval_float;
3123 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3124 break;
3125
3126 case PMT_TYPE_TCORE_CLOCK:
3127 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
3128 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3129 }
3130 }
3131
3132 fmt8 = "%s%.2f";
3133
3134 if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
3135 outp +=
3136 sprintf(outp, fmt8, (printed++ ? delim : ""),
3137 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
3138 if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
3139 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3140 rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
3141
3142 /* print per-package data only for 1st core in package */
3143 if (!is_cpu_first_core_in_package(t, c, p))
3144 goto done;
3145
3146 /* PkgTmp */
3147 if (DO_BIC(BIC_PkgTmp))
3148 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
3149
3150 /* GFXrc6 */
3151 if (DO_BIC(BIC_GFX_rc6)) {
3152 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */
3153 outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
3154 } else {
3155 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3156 p->gfx_rc6_ms / 10.0 / interval_float);
3157 }
3158 }
3159
3160 /* GFXMHz */
3161 if (DO_BIC(BIC_GFXMHz))
3162 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
3163
3164 /* GFXACTMHz */
3165 if (DO_BIC(BIC_GFXACTMHz))
3166 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
3167
3168 /* SAMmc6 */
3169 if (DO_BIC(BIC_SAM_mc6)) {
3170 if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */
3171 outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
3172 } else {
3173 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3174 p->sam_mc6_ms / 10.0 / interval_float);
3175 }
3176 }
3177
3178 /* SAMMHz */
3179 if (DO_BIC(BIC_SAMMHz))
3180 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
3181
3182 /* SAMACTMHz */
3183 if (DO_BIC(BIC_SAMACTMHz))
3184 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
3185
3186 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
3187 if (DO_BIC(BIC_Totl_c0))
3188 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
3189 if (DO_BIC(BIC_Any_c0))
3190 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
3191 if (DO_BIC(BIC_GFX_c0))
3192 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
3193 if (DO_BIC(BIC_CPUGFX))
3194 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
3195
3196 if (DO_BIC(BIC_Pkgpc2))
3197 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
3198 if (DO_BIC(BIC_Pkgpc3))
3199 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
3200 if (DO_BIC(BIC_Pkgpc6))
3201 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
3202 if (DO_BIC(BIC_Pkgpc7))
3203 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
3204 if (DO_BIC(BIC_Pkgpc8))
3205 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
3206 if (DO_BIC(BIC_Pkgpc9))
3207 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
3208 if (DO_BIC(BIC_Pkgpc10))
3209 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
3210
3211 if (DO_BIC(BIC_Diec6))
3212 outp +=
3213 sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float);
3214
3215 if (DO_BIC(BIC_CPU_LPI)) {
3216 if (p->cpu_lpi >= 0)
3217 outp +=
3218 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3219 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
3220 else
3221 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
3222 }
3223 if (DO_BIC(BIC_SYS_LPI)) {
3224 if (p->sys_lpi >= 0)
3225 outp +=
3226 sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
3227 100.0 * p->sys_lpi / 1000000.0 / interval_float);
3228 else
3229 outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
3230 }
3231
3232 if (DO_BIC(BIC_PkgWatt))
3233 outp +=
3234 sprintf(outp, fmt8, (printed++ ? delim : ""),
3235 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
3236 if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
3237 outp +=
3238 sprintf(outp, fmt8, (printed++ ? delim : ""),
3239 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
3240 if (DO_BIC(BIC_GFXWatt))
3241 outp +=
3242 sprintf(outp, fmt8, (printed++ ? delim : ""),
3243 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
3244 if (DO_BIC(BIC_RAMWatt))
3245 outp +=
3246 sprintf(outp, fmt8, (printed++ ? delim : ""),
3247 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
3248 if (DO_BIC(BIC_Pkg_J))
3249 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3250 rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
3251 if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
3252 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3253 rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
3254 if (DO_BIC(BIC_GFX_J))
3255 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3256 rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
3257 if (DO_BIC(BIC_RAM_J))
3258 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3259 rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
3260 if (DO_BIC(BIC_PKG__))
3261 outp +=
3262 sprintf(outp, fmt8, (printed++ ? delim : ""),
3263 rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
3264 if (DO_BIC(BIC_RAM__))
3265 outp +=
3266 sprintf(outp, fmt8, (printed++ ? delim : ""),
3267 rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
3268 /* UncMHz */
3269 if (DO_BIC(BIC_UNCORE_MHZ))
3270 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
3271
3272 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3273 if (mp->format == FORMAT_RAW) {
3274 if (mp->width == 32)
3275 outp +=
3276 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
3277 else
3278 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
3279 } else if (mp->format == FORMAT_DELTA) {
3280 if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3281 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
3282 else
3283 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
3284 } else if (mp->format == FORMAT_PERCENT) {
3285 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
3286 } else if (mp->type == COUNTER_K2M)
3287 outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000);
3288 }
3289
3290 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
3291 if (pp->format == FORMAT_RAW) {
3292 if (pp->width == 32)
3293 outp +=
3294 sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3295 (unsigned int)p->perf_counter[i]);
3296 else
3297 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]);
3298 } else if (pp->format == FORMAT_DELTA) {
3299 if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns)
3300 outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]);
3301 else
3302 outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]);
3303 } else if (pp->format == FORMAT_PERCENT) {
3304 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc);
3305 } else if (pp->type == COUNTER_K2M) {
3306 outp +=
3307 sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000);
3308 }
3309 }
3310
3311 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
3312 const unsigned long value_raw = p->pmt_counter[i];
3313 double value_converted;
3314 switch (ppmt->type) {
3315 case PMT_TYPE_RAW:
3316 if (pmt_counter_get_width(ppmt) <= 32)
3317 outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""),
3318 (unsigned int)p->pmt_counter[i]);
3319 else
3320 outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]);
3321
3322 break;
3323
3324 case PMT_TYPE_XTAL_TIME:
3325 value_converted = 100.0 * value_raw / crystal_hz / interval_float;
3326 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3327 break;
3328
3329 case PMT_TYPE_TCORE_CLOCK:
3330 value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
3331 outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
3332 }
3333 }
3334
3335 if (DO_BIC(BIC_SysWatt) && (t == &average.threads))
3336 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3337 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float));
3338 if (DO_BIC(BIC_Sys_J) && (t == &average.threads))
3339 outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
3340 rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float));
3341
3342 done:
3343 if (*(outp - 1) != '\n')
3344 outp += sprintf(outp, "\n");
3345
3346 return 0;
3347 }
3348
flush_output_stdout(void)3349 void flush_output_stdout(void)
3350 {
3351 FILE *filep;
3352
3353 if (outf == stderr)
3354 filep = stdout;
3355 else
3356 filep = outf;
3357
3358 fputs(output_buffer, filep);
3359 fflush(filep);
3360
3361 outp = output_buffer;
3362 }
3363
flush_output_stderr(void)3364 void flush_output_stderr(void)
3365 {
3366 fputs(output_buffer, outf);
3367 fflush(outf);
3368 outp = output_buffer;
3369 }
3370
format_all_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)3371 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3372 {
3373 static int count;
3374
3375 if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only)
3376 print_header("\t");
3377
3378 format_counters(&average.threads, &average.cores, &average.packages);
3379
3380 count++;
3381
3382 if (summary_only)
3383 return;
3384
3385 for_all_cpus(format_counters, t, c, p);
3386 }
3387
3388 #define DELTA_WRAP32(new, old) \
3389 old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
3390
delta_package(struct pkg_data * new,struct pkg_data * old)3391 int delta_package(struct pkg_data *new, struct pkg_data *old)
3392 {
3393 int i;
3394 struct msr_counter *mp;
3395 struct perf_counter_info *pp;
3396 struct pmt_counter *ppmt;
3397
3398 if (DO_BIC(BIC_Totl_c0))
3399 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
3400 if (DO_BIC(BIC_Any_c0))
3401 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
3402 if (DO_BIC(BIC_GFX_c0))
3403 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
3404 if (DO_BIC(BIC_CPUGFX))
3405 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
3406
3407 old->pc2 = new->pc2 - old->pc2;
3408 if (DO_BIC(BIC_Pkgpc3))
3409 old->pc3 = new->pc3 - old->pc3;
3410 if (DO_BIC(BIC_Pkgpc6))
3411 old->pc6 = new->pc6 - old->pc6;
3412 if (DO_BIC(BIC_Pkgpc7))
3413 old->pc7 = new->pc7 - old->pc7;
3414 old->pc8 = new->pc8 - old->pc8;
3415 old->pc9 = new->pc9 - old->pc9;
3416 old->pc10 = new->pc10 - old->pc10;
3417 old->die_c6 = new->die_c6 - old->die_c6;
3418 old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
3419 old->sys_lpi = new->sys_lpi - old->sys_lpi;
3420 old->pkg_temp_c = new->pkg_temp_c;
3421
3422 /* flag an error when rc6 counter resets/wraps */
3423 if (old->gfx_rc6_ms > new->gfx_rc6_ms)
3424 old->gfx_rc6_ms = -1;
3425 else
3426 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
3427
3428 old->uncore_mhz = new->uncore_mhz;
3429 old->gfx_mhz = new->gfx_mhz;
3430 old->gfx_act_mhz = new->gfx_act_mhz;
3431
3432 /* flag an error when mc6 counter resets/wraps */
3433 if (old->sam_mc6_ms > new->sam_mc6_ms)
3434 old->sam_mc6_ms = -1;
3435 else
3436 old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
3437
3438 old->sam_mhz = new->sam_mhz;
3439 old->sam_act_mhz = new->sam_act_mhz;
3440
3441 old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
3442 old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
3443 old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
3444 old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
3445 old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
3446 old->rapl_dram_perf_status.raw_value =
3447 new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
3448
3449 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3450 if (mp->format == FORMAT_RAW)
3451 old->counter[i] = new->counter[i];
3452 else if (mp->format == FORMAT_AVERAGE)
3453 old->counter[i] = new->counter[i];
3454 else
3455 old->counter[i] = new->counter[i] - old->counter[i];
3456 }
3457
3458 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
3459 if (pp->format == FORMAT_RAW)
3460 old->perf_counter[i] = new->perf_counter[i];
3461 else if (pp->format == FORMAT_AVERAGE)
3462 old->perf_counter[i] = new->perf_counter[i];
3463 else
3464 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i];
3465 }
3466
3467 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
3468 if (ppmt->format == FORMAT_RAW)
3469 old->pmt_counter[i] = new->pmt_counter[i];
3470 else
3471 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i];
3472 }
3473
3474 return 0;
3475 }
3476
delta_core(struct core_data * new,struct core_data * old)3477 void delta_core(struct core_data *new, struct core_data *old)
3478 {
3479 int i;
3480 struct msr_counter *mp;
3481 struct perf_counter_info *pp;
3482 struct pmt_counter *ppmt;
3483
3484 old->c3 = new->c3 - old->c3;
3485 old->c6 = new->c6 - old->c6;
3486 old->c7 = new->c7 - old->c7;
3487 old->core_temp_c = new->core_temp_c;
3488 old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt;
3489 old->mc6_us = new->mc6_us - old->mc6_us;
3490
3491 DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
3492
3493 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3494 if (mp->format == FORMAT_RAW)
3495 old->counter[i] = new->counter[i];
3496 else
3497 old->counter[i] = new->counter[i] - old->counter[i];
3498 }
3499
3500 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
3501 if (pp->format == FORMAT_RAW)
3502 old->perf_counter[i] = new->perf_counter[i];
3503 else
3504 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i];
3505 }
3506
3507 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
3508 if (ppmt->format == FORMAT_RAW)
3509 old->pmt_counter[i] = new->pmt_counter[i];
3510 else
3511 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i];
3512 }
3513 }
3514
soft_c1_residency_display(int bic)3515 int soft_c1_residency_display(int bic)
3516 {
3517 if (!DO_BIC(BIC_CPU_c1) || platform->has_msr_core_c1_res)
3518 return 0;
3519
3520 return DO_BIC_READ(bic);
3521 }
3522
3523 /*
3524 * old = new - old
3525 */
delta_thread(struct thread_data * new,struct thread_data * old,struct core_data * core_delta)3526 int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
3527 {
3528 int i;
3529 struct msr_counter *mp;
3530 struct perf_counter_info *pp;
3531 struct pmt_counter *ppmt;
3532
3533 /* we run cpuid just the 1st time, copy the results */
3534 if (DO_BIC(BIC_APIC))
3535 new->apic_id = old->apic_id;
3536 if (DO_BIC(BIC_X2APIC))
3537 new->x2apic_id = old->x2apic_id;
3538
3539 /*
3540 * the timestamps from start of measurement interval are in "old"
3541 * the timestamp from end of measurement interval are in "new"
3542 * over-write old w/ new so we can print end of interval values
3543 */
3544
3545 timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
3546 old->tv_begin = new->tv_begin;
3547 old->tv_end = new->tv_end;
3548
3549 old->tsc = new->tsc - old->tsc;
3550
3551 /* check for TSC < 1 Mcycles over interval */
3552 if (old->tsc < (1000 * 1000))
3553 errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
3554 "You can disable all c-states by booting with \"idle=poll\"\n"
3555 "or just the deep ones with \"processor.max_cstate=1\"");
3556
3557 old->c1 = new->c1 - old->c1;
3558
3559 if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
3560 || soft_c1_residency_display(BIC_Avg_MHz)) {
3561 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
3562 old->aperf = new->aperf - old->aperf;
3563 old->mperf = new->mperf - old->mperf;
3564 } else {
3565 return -1;
3566 }
3567 }
3568
3569 if (platform->has_msr_core_c1_res) {
3570 /*
3571 * Some models have a dedicated C1 residency MSR,
3572 * which should be more accurate than the derivation below.
3573 */
3574 } else {
3575 /*
3576 * As counter collection is not atomic,
3577 * it is possible for mperf's non-halted cycles + idle states
3578 * to exceed TSC's all cycles: show c1 = 0% in that case.
3579 */
3580 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
3581 old->c1 = 0;
3582 else {
3583 /* normal case, derive c1 */
3584 old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
3585 - core_delta->c6 - core_delta->c7;
3586 }
3587 }
3588
3589 if (old->mperf == 0) {
3590 if (debug > 1)
3591 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
3592 old->mperf = 1; /* divide by 0 protection */
3593 }
3594
3595 if (DO_BIC(BIC_IPC))
3596 old->instr_count = new->instr_count - old->instr_count;
3597
3598 if (DO_BIC(BIC_IRQ))
3599 old->irq_count = new->irq_count - old->irq_count;
3600
3601 if (DO_BIC(BIC_NMI))
3602 old->nmi_count = new->nmi_count - old->nmi_count;
3603
3604 if (DO_BIC(BIC_SMI))
3605 old->smi_count = new->smi_count - old->smi_count;
3606
3607 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
3608 if (mp->format == FORMAT_RAW)
3609 old->counter[i] = new->counter[i];
3610 else
3611 old->counter[i] = new->counter[i] - old->counter[i];
3612 }
3613
3614 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) {
3615 if (pp->format == FORMAT_RAW)
3616 old->perf_counter[i] = new->perf_counter[i];
3617 else
3618 old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i];
3619 }
3620
3621 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
3622 if (ppmt->format == FORMAT_RAW)
3623 old->pmt_counter[i] = new->pmt_counter[i];
3624 else
3625 old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i];
3626 }
3627
3628 return 0;
3629 }
3630
delta_cpu(struct thread_data * t,struct core_data * c,struct pkg_data * p,struct thread_data * t2,struct core_data * c2,struct pkg_data * p2)3631 int delta_cpu(struct thread_data *t, struct core_data *c,
3632 struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
3633 {
3634 int retval = 0;
3635
3636 /* calculate core delta only for 1st thread in core */
3637 if (is_cpu_first_thread_in_core(t, c, p))
3638 delta_core(c, c2);
3639
3640 /* always calculate thread delta */
3641 retval = delta_thread(t, t2, c2); /* c2 is core delta */
3642
3643 /* calculate package delta only for 1st core in package */
3644 if (is_cpu_first_core_in_package(t, c, p))
3645 retval |= delta_package(p, p2);
3646
3647 return retval;
3648 }
3649
delta_platform(struct platform_counters * new,struct platform_counters * old)3650 void delta_platform(struct platform_counters *new, struct platform_counters *old)
3651 {
3652 old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value;
3653 }
3654
rapl_counter_clear(struct rapl_counter * c)3655 void rapl_counter_clear(struct rapl_counter *c)
3656 {
3657 c->raw_value = 0;
3658 c->scale = 0.0;
3659 c->unit = RAPL_UNIT_INVALID;
3660 }
3661
clear_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)3662 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3663 {
3664 int i;
3665 struct msr_counter *mp;
3666
3667 t->tv_begin.tv_sec = 0;
3668 t->tv_begin.tv_usec = 0;
3669 t->tv_end.tv_sec = 0;
3670 t->tv_end.tv_usec = 0;
3671 t->tv_delta.tv_sec = 0;
3672 t->tv_delta.tv_usec = 0;
3673
3674 t->tsc = 0;
3675 t->aperf = 0;
3676 t->mperf = 0;
3677 t->c1 = 0;
3678
3679 t->instr_count = 0;
3680
3681 t->irq_count = 0;
3682 t->nmi_count = 0;
3683 t->smi_count = 0;
3684
3685 c->c3 = 0;
3686 c->c6 = 0;
3687 c->c7 = 0;
3688 c->mc6_us = 0;
3689 c->core_temp_c = 0;
3690 rapl_counter_clear(&c->core_energy);
3691 c->core_throt_cnt = 0;
3692
3693 p->pkg_wtd_core_c0 = 0;
3694 p->pkg_any_core_c0 = 0;
3695 p->pkg_any_gfxe_c0 = 0;
3696 p->pkg_both_core_gfxe_c0 = 0;
3697
3698 p->pc2 = 0;
3699 if (DO_BIC(BIC_Pkgpc3))
3700 p->pc3 = 0;
3701 if (DO_BIC(BIC_Pkgpc6))
3702 p->pc6 = 0;
3703 if (DO_BIC(BIC_Pkgpc7))
3704 p->pc7 = 0;
3705 p->pc8 = 0;
3706 p->pc9 = 0;
3707 p->pc10 = 0;
3708 p->die_c6 = 0;
3709 p->cpu_lpi = 0;
3710 p->sys_lpi = 0;
3711
3712 rapl_counter_clear(&p->energy_pkg);
3713 rapl_counter_clear(&p->energy_dram);
3714 rapl_counter_clear(&p->energy_cores);
3715 rapl_counter_clear(&p->energy_gfx);
3716 rapl_counter_clear(&p->rapl_pkg_perf_status);
3717 rapl_counter_clear(&p->rapl_dram_perf_status);
3718 p->pkg_temp_c = 0;
3719
3720 p->gfx_rc6_ms = 0;
3721 p->uncore_mhz = 0;
3722 p->gfx_mhz = 0;
3723 p->gfx_act_mhz = 0;
3724 p->sam_mc6_ms = 0;
3725 p->sam_mhz = 0;
3726 p->sam_act_mhz = 0;
3727 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
3728 t->counter[i] = 0;
3729
3730 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
3731 c->counter[i] = 0;
3732
3733 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
3734 p->counter[i] = 0;
3735
3736 memset(&t->perf_counter[0], 0, sizeof(t->perf_counter));
3737 memset(&c->perf_counter[0], 0, sizeof(c->perf_counter));
3738 memset(&p->perf_counter[0], 0, sizeof(p->perf_counter));
3739
3740 memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter));
3741 memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter));
3742 memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter));
3743 }
3744
rapl_counter_accumulate(struct rapl_counter * dst,const struct rapl_counter * src)3745 void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
3746 {
3747 /* Copy unit and scale from src if dst is not initialized */
3748 if (dst->unit == RAPL_UNIT_INVALID) {
3749 dst->unit = src->unit;
3750 dst->scale = src->scale;
3751 }
3752
3753 assert(dst->unit == src->unit);
3754 assert(dst->scale == src->scale);
3755
3756 dst->raw_value += src->raw_value;
3757 }
3758
sum_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)3759 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3760 {
3761 int i;
3762 struct msr_counter *mp;
3763 struct perf_counter_info *pp;
3764 struct pmt_counter *ppmt;
3765
3766 /* copy un-changing apic_id's */
3767 if (DO_BIC(BIC_APIC))
3768 average.threads.apic_id = t->apic_id;
3769 if (DO_BIC(BIC_X2APIC))
3770 average.threads.x2apic_id = t->x2apic_id;
3771
3772 /* remember first tv_begin */
3773 if (average.threads.tv_begin.tv_sec == 0)
3774 average.threads.tv_begin = procsysfs_tv_begin;
3775
3776 /* remember last tv_end */
3777 average.threads.tv_end = t->tv_end;
3778
3779 average.threads.tsc += t->tsc;
3780 average.threads.aperf += t->aperf;
3781 average.threads.mperf += t->mperf;
3782 average.threads.c1 += t->c1;
3783
3784 average.threads.instr_count += t->instr_count;
3785
3786 average.threads.irq_count += t->irq_count;
3787 average.threads.nmi_count += t->nmi_count;
3788 average.threads.smi_count += t->smi_count;
3789
3790 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
3791 if (mp->format == FORMAT_RAW)
3792 continue;
3793 average.threads.counter[i] += t->counter[i];
3794 }
3795
3796 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) {
3797 if (pp->format == FORMAT_RAW)
3798 continue;
3799 average.threads.perf_counter[i] += t->perf_counter[i];
3800 }
3801
3802 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
3803 average.threads.pmt_counter[i] += t->pmt_counter[i];
3804 }
3805
3806 /* sum per-core values only for 1st thread in core */
3807 if (!is_cpu_first_thread_in_core(t, c, p))
3808 return 0;
3809
3810 average.cores.c3 += c->c3;
3811 average.cores.c6 += c->c6;
3812 average.cores.c7 += c->c7;
3813 average.cores.mc6_us += c->mc6_us;
3814
3815 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
3816 average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
3817
3818 rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
3819
3820 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3821 if (mp->format == FORMAT_RAW)
3822 continue;
3823 average.cores.counter[i] += c->counter[i];
3824 }
3825
3826 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
3827 if (pp->format == FORMAT_RAW)
3828 continue;
3829 average.cores.perf_counter[i] += c->perf_counter[i];
3830 }
3831
3832 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
3833 average.cores.pmt_counter[i] += c->pmt_counter[i];
3834 }
3835
3836 /* sum per-pkg values only for 1st core in pkg */
3837 if (!is_cpu_first_core_in_package(t, c, p))
3838 return 0;
3839
3840 if (DO_BIC(BIC_Totl_c0))
3841 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
3842 if (DO_BIC(BIC_Any_c0))
3843 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
3844 if (DO_BIC(BIC_GFX_c0))
3845 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
3846 if (DO_BIC(BIC_CPUGFX))
3847 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
3848
3849 average.packages.pc2 += p->pc2;
3850 if (DO_BIC(BIC_Pkgpc3))
3851 average.packages.pc3 += p->pc3;
3852 if (DO_BIC(BIC_Pkgpc6))
3853 average.packages.pc6 += p->pc6;
3854 if (DO_BIC(BIC_Pkgpc7))
3855 average.packages.pc7 += p->pc7;
3856 average.packages.pc8 += p->pc8;
3857 average.packages.pc9 += p->pc9;
3858 average.packages.pc10 += p->pc10;
3859 average.packages.die_c6 += p->die_c6;
3860
3861 average.packages.cpu_lpi = p->cpu_lpi;
3862 average.packages.sys_lpi = p->sys_lpi;
3863
3864 rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
3865 rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
3866 rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
3867 rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
3868
3869 average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
3870 average.packages.uncore_mhz = p->uncore_mhz;
3871 average.packages.gfx_mhz = p->gfx_mhz;
3872 average.packages.gfx_act_mhz = p->gfx_act_mhz;
3873 average.packages.sam_mc6_ms = p->sam_mc6_ms;
3874 average.packages.sam_mhz = p->sam_mhz;
3875 average.packages.sam_act_mhz = p->sam_act_mhz;
3876
3877 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
3878
3879 rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
3880 rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
3881
3882 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3883 if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
3884 average.packages.counter[i] = p->counter[i];
3885 else
3886 average.packages.counter[i] += p->counter[i];
3887 }
3888
3889 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
3890 if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0))
3891 average.packages.perf_counter[i] = p->perf_counter[i];
3892 else
3893 average.packages.perf_counter[i] += p->perf_counter[i];
3894 }
3895
3896 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
3897 average.packages.pmt_counter[i] += p->pmt_counter[i];
3898 }
3899
3900 return 0;
3901 }
3902
3903 /*
3904 * sum the counters for all cpus in the system
3905 * compute the weighted average
3906 */
compute_average(struct thread_data * t,struct core_data * c,struct pkg_data * p)3907 void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
3908 {
3909 int i;
3910 struct msr_counter *mp;
3911 struct perf_counter_info *pp;
3912 struct pmt_counter *ppmt;
3913
3914 clear_counters(&average.threads, &average.cores, &average.packages);
3915
3916 for_all_cpus(sum_counters, t, c, p);
3917
3918 /* Use the global time delta for the average. */
3919 average.threads.tv_delta = tv_delta;
3920
3921 average.threads.tsc /= topo.allowed_cpus;
3922 average.threads.aperf /= topo.allowed_cpus;
3923 average.threads.mperf /= topo.allowed_cpus;
3924 average.threads.instr_count /= topo.allowed_cpus;
3925 average.threads.c1 /= topo.allowed_cpus;
3926
3927 if (average.threads.irq_count > 9999999)
3928 sums_need_wide_columns = 1;
3929 if (average.threads.nmi_count > 9999999)
3930 sums_need_wide_columns = 1;
3931
3932
3933 average.cores.c3 /= topo.allowed_cores;
3934 average.cores.c6 /= topo.allowed_cores;
3935 average.cores.c7 /= topo.allowed_cores;
3936 average.cores.mc6_us /= topo.allowed_cores;
3937
3938 if (DO_BIC(BIC_Totl_c0))
3939 average.packages.pkg_wtd_core_c0 /= topo.allowed_packages;
3940 if (DO_BIC(BIC_Any_c0))
3941 average.packages.pkg_any_core_c0 /= topo.allowed_packages;
3942 if (DO_BIC(BIC_GFX_c0))
3943 average.packages.pkg_any_gfxe_c0 /= topo.allowed_packages;
3944 if (DO_BIC(BIC_CPUGFX))
3945 average.packages.pkg_both_core_gfxe_c0 /= topo.allowed_packages;
3946
3947 average.packages.pc2 /= topo.allowed_packages;
3948 if (DO_BIC(BIC_Pkgpc3))
3949 average.packages.pc3 /= topo.allowed_packages;
3950 if (DO_BIC(BIC_Pkgpc6))
3951 average.packages.pc6 /= topo.allowed_packages;
3952 if (DO_BIC(BIC_Pkgpc7))
3953 average.packages.pc7 /= topo.allowed_packages;
3954
3955 average.packages.pc8 /= topo.allowed_packages;
3956 average.packages.pc9 /= topo.allowed_packages;
3957 average.packages.pc10 /= topo.allowed_packages;
3958 average.packages.die_c6 /= topo.allowed_packages;
3959
3960 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
3961 if (mp->format == FORMAT_RAW)
3962 continue;
3963 if (mp->type == COUNTER_ITEMS) {
3964 if (average.threads.counter[i] > 9999999)
3965 sums_need_wide_columns = 1;
3966 continue;
3967 }
3968 average.threads.counter[i] /= topo.allowed_cpus;
3969 }
3970 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
3971 if (mp->format == FORMAT_RAW)
3972 continue;
3973 if (mp->type == COUNTER_ITEMS) {
3974 if (average.cores.counter[i] > 9999999)
3975 sums_need_wide_columns = 1;
3976 }
3977 average.cores.counter[i] /= topo.allowed_cores;
3978 }
3979 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
3980 if (mp->format == FORMAT_RAW)
3981 continue;
3982 if (mp->type == COUNTER_ITEMS) {
3983 if (average.packages.counter[i] > 9999999)
3984 sums_need_wide_columns = 1;
3985 }
3986 average.packages.counter[i] /= topo.allowed_packages;
3987 }
3988
3989 for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) {
3990 if (pp->format == FORMAT_RAW)
3991 continue;
3992 if (pp->type == COUNTER_ITEMS) {
3993 if (average.threads.perf_counter[i] > 9999999)
3994 sums_need_wide_columns = 1;
3995 continue;
3996 }
3997 average.threads.perf_counter[i] /= topo.allowed_cpus;
3998 }
3999 for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) {
4000 if (pp->format == FORMAT_RAW)
4001 continue;
4002 if (pp->type == COUNTER_ITEMS) {
4003 if (average.cores.perf_counter[i] > 9999999)
4004 sums_need_wide_columns = 1;
4005 }
4006 average.cores.perf_counter[i] /= topo.allowed_cores;
4007 }
4008 for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) {
4009 if (pp->format == FORMAT_RAW)
4010 continue;
4011 if (pp->type == COUNTER_ITEMS) {
4012 if (average.packages.perf_counter[i] > 9999999)
4013 sums_need_wide_columns = 1;
4014 }
4015 average.packages.perf_counter[i] /= topo.allowed_packages;
4016 }
4017
4018 for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
4019 average.threads.pmt_counter[i] /= topo.allowed_cpus;
4020 }
4021 for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
4022 average.cores.pmt_counter[i] /= topo.allowed_cores;
4023 }
4024 for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
4025 average.packages.pmt_counter[i] /= topo.allowed_packages;
4026 }
4027 }
4028
rdtsc(void)4029 static unsigned long long rdtsc(void)
4030 {
4031 unsigned int low, high;
4032
4033 asm volatile ("rdtsc":"=a" (low), "=d"(high));
4034
4035 return low | ((unsigned long long)high) << 32;
4036 }
4037
4038 /*
4039 * Open a file, and exit on failure
4040 */
fopen_or_die(const char * path,const char * mode)4041 FILE *fopen_or_die(const char *path, const char *mode)
4042 {
4043 FILE *filep = fopen(path, mode);
4044
4045 if (!filep)
4046 err(1, "%s: open failed", path);
4047 return filep;
4048 }
4049
4050 /*
4051 * snapshot_sysfs_counter()
4052 *
4053 * return snapshot of given counter
4054 */
snapshot_sysfs_counter(char * path)4055 unsigned long long snapshot_sysfs_counter(char *path)
4056 {
4057 FILE *fp;
4058 int retval;
4059 unsigned long long counter;
4060
4061 fp = fopen_or_die(path, "r");
4062
4063 retval = fscanf(fp, "%lld", &counter);
4064 if (retval != 1)
4065 err(1, "snapshot_sysfs_counter(%s)", path);
4066
4067 fclose(fp);
4068
4069 return counter;
4070 }
4071
get_mp(int cpu,struct msr_counter * mp,unsigned long long * counterp,char * counter_path)4072 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp, char *counter_path)
4073 {
4074 if (mp->msr_num != 0) {
4075 assert(!no_msr);
4076 if (get_msr(cpu, mp->msr_num, counterp))
4077 return -1;
4078 } else {
4079 char path[128 + PATH_BYTES];
4080
4081 if (mp->flags & SYSFS_PERCPU) {
4082 sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->sp->path);
4083
4084 *counterp = snapshot_sysfs_counter(path);
4085 } else {
4086 *counterp = snapshot_sysfs_counter(counter_path);
4087 }
4088 }
4089
4090 return 0;
4091 }
4092
get_legacy_uncore_mhz(int package)4093 unsigned long long get_legacy_uncore_mhz(int package)
4094 {
4095 char path[128];
4096 int die;
4097 static int warn_once;
4098
4099 /*
4100 * for this package, use the first die_id that exists
4101 */
4102 for (die = 0; die <= topo.max_die_id; ++die) {
4103
4104 sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz",
4105 package, die);
4106
4107 if (access(path, R_OK) == 0)
4108 return (snapshot_sysfs_counter(path) / 1000);
4109 }
4110 if (!warn_once) {
4111 warnx("BUG: %s: No %s", __func__, path);
4112 warn_once = 1;
4113 }
4114
4115 return 0;
4116 }
4117
get_epb(int cpu)4118 int get_epb(int cpu)
4119 {
4120 char path[128 + PATH_BYTES];
4121 unsigned long long msr;
4122 int ret, epb = -1;
4123 FILE *fp;
4124
4125 sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
4126
4127 fp = fopen(path, "r");
4128 if (!fp)
4129 goto msr_fallback;
4130
4131 ret = fscanf(fp, "%d", &epb);
4132 if (ret != 1)
4133 err(1, "%s(%s)", __func__, path);
4134
4135 fclose(fp);
4136
4137 return epb;
4138
4139 msr_fallback:
4140 if (no_msr)
4141 return -1;
4142
4143 get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
4144
4145 return msr & 0xf;
4146 }
4147
get_apic_id(struct thread_data * t)4148 void get_apic_id(struct thread_data *t)
4149 {
4150 unsigned int eax, ebx, ecx, edx;
4151
4152 if (DO_BIC(BIC_APIC)) {
4153 eax = ebx = ecx = edx = 0;
4154 __cpuid(1, eax, ebx, ecx, edx);
4155
4156 t->apic_id = (ebx >> 24) & 0xff;
4157 }
4158
4159 if (!DO_BIC(BIC_X2APIC))
4160 return;
4161
4162 if (authentic_amd || hygon_genuine) {
4163 unsigned int topology_extensions;
4164
4165 if (max_extended_level < 0x8000001e)
4166 return;
4167
4168 eax = ebx = ecx = edx = 0;
4169 __cpuid(0x80000001, eax, ebx, ecx, edx);
4170 topology_extensions = ecx & (1 << 22);
4171
4172 if (topology_extensions == 0)
4173 return;
4174
4175 eax = ebx = ecx = edx = 0;
4176 __cpuid(0x8000001e, eax, ebx, ecx, edx);
4177
4178 t->x2apic_id = eax;
4179 return;
4180 }
4181
4182 if (!genuine_intel)
4183 return;
4184
4185 if (max_level < 0xb)
4186 return;
4187
4188 ecx = 0;
4189 __cpuid(0xb, eax, ebx, ecx, edx);
4190 t->x2apic_id = edx;
4191
4192 if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
4193 fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
4194 }
4195
get_core_throt_cnt(int cpu,unsigned long long * cnt)4196 int get_core_throt_cnt(int cpu, unsigned long long *cnt)
4197 {
4198 char path[128 + PATH_BYTES];
4199 unsigned long long tmp;
4200 FILE *fp;
4201 int ret;
4202
4203 sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu);
4204 fp = fopen(path, "r");
4205 if (!fp)
4206 return -1;
4207 ret = fscanf(fp, "%lld", &tmp);
4208 fclose(fp);
4209 if (ret != 1)
4210 return -1;
4211 *cnt = tmp;
4212
4213 return 0;
4214 }
4215
4216 struct amperf_group_fd {
4217 int aperf; /* Also the group descriptor */
4218 int mperf;
4219 };
4220
read_perf_counter_info(const char * const path,const char * const parse_format,void * value_ptr)4221 static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
4222 {
4223 int fdmt;
4224 int bytes_read;
4225 char buf[64];
4226 int ret = -1;
4227
4228 fdmt = open(path, O_RDONLY, 0);
4229 if (fdmt == -1) {
4230 if (debug)
4231 fprintf(stderr, "Failed to parse perf counter info %s\n", path);
4232 ret = -1;
4233 goto cleanup_and_exit;
4234 }
4235
4236 bytes_read = read(fdmt, buf, sizeof(buf) - 1);
4237 if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
4238 if (debug)
4239 fprintf(stderr, "Failed to parse perf counter info %s\n", path);
4240 ret = -1;
4241 goto cleanup_and_exit;
4242 }
4243
4244 buf[bytes_read] = '\0';
4245
4246 if (sscanf(buf, parse_format, value_ptr) != 1) {
4247 if (debug)
4248 fprintf(stderr, "Failed to parse perf counter info %s\n", path);
4249 ret = -1;
4250 goto cleanup_and_exit;
4251 }
4252
4253 ret = 0;
4254
4255 cleanup_and_exit:
4256 close(fdmt);
4257 return ret;
4258 }
4259
read_perf_counter_info_n(const char * const path,const char * const parse_format)4260 static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
4261 {
4262 unsigned int v;
4263 int status;
4264
4265 status = read_perf_counter_info(path, parse_format, &v);
4266 if (status)
4267 v = -1;
4268
4269 return v;
4270 }
4271
read_perf_type(const char * subsys)4272 static unsigned int read_perf_type(const char *subsys)
4273 {
4274 const char *const path_format = "/sys/bus/event_source/devices/%s/type";
4275 const char *const format = "%u";
4276 char path[128];
4277
4278 snprintf(path, sizeof(path), path_format, subsys);
4279
4280 return read_perf_counter_info_n(path, format);
4281 }
4282
read_perf_config(const char * subsys,const char * event_name)4283 static unsigned int read_perf_config(const char *subsys, const char *event_name)
4284 {
4285 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
4286 FILE *fconfig = NULL;
4287 char path[128];
4288 char config_str[64];
4289 unsigned int config;
4290 unsigned int umask;
4291 bool has_config = false;
4292 bool has_umask = false;
4293 unsigned int ret = -1;
4294
4295 snprintf(path, sizeof(path), path_format, subsys, event_name);
4296
4297 fconfig = fopen(path, "r");
4298 if (!fconfig)
4299 return -1;
4300
4301 if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str)
4302 goto cleanup_and_exit;
4303
4304 for (char *pconfig_str = &config_str[0]; pconfig_str;) {
4305 if (sscanf(pconfig_str, "event=%x", &config) == 1) {
4306 has_config = true;
4307 goto next;
4308 }
4309
4310 if (sscanf(pconfig_str, "umask=%x", &umask) == 1) {
4311 has_umask = true;
4312 goto next;
4313 }
4314
4315 next:
4316 pconfig_str = strchr(pconfig_str, ',');
4317 if (pconfig_str) {
4318 *pconfig_str = '\0';
4319 ++pconfig_str;
4320 }
4321 }
4322
4323 if (!has_umask)
4324 umask = 0;
4325
4326 if (has_config)
4327 ret = (umask << 8) | config;
4328
4329 cleanup_and_exit:
4330 fclose(fconfig);
4331 return ret;
4332 }
4333
read_perf_rapl_unit(const char * subsys,const char * event_name)4334 static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
4335 {
4336 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
4337 const char *const format = "%s";
4338 char path[128];
4339 char unit_buffer[16];
4340
4341 snprintf(path, sizeof(path), path_format, subsys, event_name);
4342
4343 read_perf_counter_info(path, format, &unit_buffer);
4344 if (strcmp("Joules", unit_buffer) == 0)
4345 return RAPL_UNIT_JOULES;
4346
4347 return RAPL_UNIT_INVALID;
4348 }
4349
read_perf_scale(const char * subsys,const char * event_name)4350 static double read_perf_scale(const char *subsys, const char *event_name)
4351 {
4352 const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
4353 const char *const format = "%lf";
4354 char path[128];
4355 double scale;
4356
4357 snprintf(path, sizeof(path), path_format, subsys, event_name);
4358
4359 if (read_perf_counter_info(path, format, &scale))
4360 return 0.0;
4361
4362 return scale;
4363 }
4364
rapl_counter_info_count_perf(const struct rapl_counter_info_t * rci)4365 size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
4366 {
4367 size_t ret = 0;
4368
4369 for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
4370 if (rci->source[i] == COUNTER_SOURCE_PERF)
4371 ++ret;
4372
4373 return ret;
4374 }
4375
cstate_counter_info_count_perf(const struct cstate_counter_info_t * cci)4376 static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t *cci)
4377 {
4378 size_t ret = 0;
4379
4380 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i)
4381 if (cci->source[i] == COUNTER_SOURCE_PERF)
4382 ++ret;
4383
4384 return ret;
4385 }
4386
write_rapl_counter(struct rapl_counter * rc,struct rapl_counter_info_t * rci,unsigned int idx)4387 void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
4388 {
4389 if (rci->source[idx] == COUNTER_SOURCE_NONE)
4390 return;
4391
4392 rc->raw_value = rci->data[idx];
4393 rc->unit = rci->unit[idx];
4394 rc->scale = rci->scale[idx];
4395 }
4396
get_rapl_counters(int cpu,unsigned int domain,struct core_data * c,struct pkg_data * p)4397 int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p)
4398 {
4399 struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even;
4400 unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
4401 struct rapl_counter_info_t *rci;
4402
4403 if (debug >= 2)
4404 fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
4405
4406 assert(rapl_counter_info_perdomain);
4407 assert(domain < rapl_counter_info_perdomain_size);
4408
4409 rci = &rapl_counter_info_perdomain[domain];
4410
4411 /*
4412 * If we have any perf counters to read, read them all now, in bulk
4413 */
4414 if (rci->fd_perf != -1) {
4415 size_t num_perf_counters = rapl_counter_info_count_perf(rci);
4416 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
4417 const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
4418
4419 if (actual_read_size != expected_read_size)
4420 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
4421 actual_read_size);
4422 }
4423
4424 for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
4425 switch (rci->source[i]) {
4426 case COUNTER_SOURCE_NONE:
4427 rci->data[i] = 0;
4428 break;
4429
4430 case COUNTER_SOURCE_PERF:
4431 assert(pi < ARRAY_SIZE(perf_data));
4432 assert(rci->fd_perf != -1);
4433
4434 if (debug >= 2)
4435 fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
4436 i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
4437
4438 rci->data[i] = perf_data[pi];
4439
4440 ++pi;
4441 break;
4442
4443 case COUNTER_SOURCE_MSR:
4444 if (debug >= 2)
4445 fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
4446
4447 assert(!no_msr);
4448 if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
4449 if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
4450 return -13 - i;
4451 } else {
4452 if (get_msr(cpu, rci->msr[i], &rci->data[i]))
4453 return -13 - i;
4454 }
4455
4456 rci->data[i] &= rci->msr_mask[i];
4457 if (rci->msr_shift[i] >= 0)
4458 rci->data[i] >>= abs(rci->msr_shift[i]);
4459 else
4460 rci->data[i] <<= abs(rci->msr_shift[i]);
4461
4462 break;
4463 }
4464 }
4465
4466 BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8);
4467 write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
4468 write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
4469 write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
4470 write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
4471 write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
4472 write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
4473 write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
4474 write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM);
4475
4476 return 0;
4477 }
4478
find_sysfs_path_by_id(struct sysfs_path * sp,int id)4479 char *find_sysfs_path_by_id(struct sysfs_path *sp, int id)
4480 {
4481 while (sp) {
4482 if (sp->id == id)
4483 return (sp->path);
4484 sp = sp->next;
4485 }
4486 if (debug)
4487 warnx("%s: id%d not found", __func__, id);
4488 return NULL;
4489 }
4490
get_cstate_counters(unsigned int cpu,struct thread_data * t,struct core_data * c,struct pkg_data * p)4491 int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_data *c, struct pkg_data *p)
4492 {
4493 /*
4494 * Overcommit memory a little bit here,
4495 * but skip calculating exact sizes for the buffers.
4496 */
4497 unsigned long long perf_data[NUM_CSTATE_COUNTERS];
4498 unsigned long long perf_data_core[NUM_CSTATE_COUNTERS + 1];
4499 unsigned long long perf_data_pkg[NUM_CSTATE_COUNTERS + 1];
4500
4501 struct cstate_counter_info_t *cci;
4502
4503 if (debug >= 2)
4504 fprintf(stderr, "%s: cpu%d\n", __func__, cpu);
4505
4506 assert(ccstate_counter_info);
4507 assert(cpu <= ccstate_counter_info_size);
4508
4509 ZERO_ARRAY(perf_data);
4510 ZERO_ARRAY(perf_data_core);
4511 ZERO_ARRAY(perf_data_pkg);
4512
4513 cci = &ccstate_counter_info[cpu];
4514
4515 /*
4516 * If we have any perf counters to read, read them all now, in bulk
4517 */
4518 const size_t num_perf_counters = cstate_counter_info_count_perf(cci);
4519 ssize_t expected_read_size = num_perf_counters * sizeof(unsigned long long);
4520 ssize_t actual_read_size_core = 0, actual_read_size_pkg = 0;
4521
4522 if (cci->fd_perf_core != -1) {
4523 /* Each descriptor read begins with number of counters read. */
4524 expected_read_size += sizeof(unsigned long long);
4525
4526 actual_read_size_core = read(cci->fd_perf_core, &perf_data_core[0], sizeof(perf_data_core));
4527
4528 if (actual_read_size_core <= 0)
4529 err(-1, "%s: read perf %s: %ld", __func__, "core", actual_read_size_core);
4530 }
4531
4532 if (cci->fd_perf_pkg != -1) {
4533 /* Each descriptor read begins with number of counters read. */
4534 expected_read_size += sizeof(unsigned long long);
4535
4536 actual_read_size_pkg = read(cci->fd_perf_pkg, &perf_data_pkg[0], sizeof(perf_data_pkg));
4537
4538 if (actual_read_size_pkg <= 0)
4539 err(-1, "%s: read perf %s: %ld", __func__, "pkg", actual_read_size_pkg);
4540 }
4541
4542 const ssize_t actual_read_size_total = actual_read_size_core + actual_read_size_pkg;
4543
4544 if (actual_read_size_total != expected_read_size)
4545 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size_total);
4546
4547 /*
4548 * Copy ccstate and pcstate data into unified buffer.
4549 *
4550 * Skip first element from core and pkg buffers.
4551 * Kernel puts there how many counters were read.
4552 */
4553 const size_t num_core_counters = perf_data_core[0];
4554 const size_t num_pkg_counters = perf_data_pkg[0];
4555
4556 assert(num_perf_counters == num_core_counters + num_pkg_counters);
4557
4558 /* Copy ccstate perf data */
4559 memcpy(&perf_data[0], &perf_data_core[1], num_core_counters * sizeof(unsigned long long));
4560
4561 /* Copy pcstate perf data */
4562 memcpy(&perf_data[num_core_counters], &perf_data_pkg[1], num_pkg_counters * sizeof(unsigned long long));
4563
4564 for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) {
4565 switch (cci->source[i]) {
4566 case COUNTER_SOURCE_NONE:
4567 break;
4568
4569 case COUNTER_SOURCE_PERF:
4570 assert(pi < ARRAY_SIZE(perf_data));
4571 assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1);
4572
4573 if (debug >= 2)
4574 fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]);
4575
4576 cci->data[i] = perf_data[pi];
4577
4578 ++pi;
4579 break;
4580
4581 case COUNTER_SOURCE_MSR:
4582 assert(!no_msr);
4583 if (get_msr(cpu, cci->msr[i], &cci->data[i]))
4584 return -13 - i;
4585
4586 if (debug >= 2)
4587 fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]);
4588
4589 break;
4590 }
4591 }
4592
4593 /*
4594 * Helper to write the data only if the source of
4595 * the counter for the current cpu is not none.
4596 *
4597 * Otherwise we would overwrite core data with 0 (default value),
4598 * when invoked for the thread sibling.
4599 */
4600 #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \
4601 if (cci->source[index] != COUNTER_SOURCE_NONE) \
4602 out_counter = cci->data[index]; \
4603 } while (0)
4604
4605 BUILD_BUG_ON(NUM_CSTATE_COUNTERS != 11);
4606
4607 PERF_COUNTER_WRITE_DATA(t->c1, CCSTATE_RCI_INDEX_C1_RESIDENCY);
4608 PERF_COUNTER_WRITE_DATA(c->c3, CCSTATE_RCI_INDEX_C3_RESIDENCY);
4609 PERF_COUNTER_WRITE_DATA(c->c6, CCSTATE_RCI_INDEX_C6_RESIDENCY);
4610 PERF_COUNTER_WRITE_DATA(c->c7, CCSTATE_RCI_INDEX_C7_RESIDENCY);
4611
4612 PERF_COUNTER_WRITE_DATA(p->pc2, PCSTATE_RCI_INDEX_C2_RESIDENCY);
4613 PERF_COUNTER_WRITE_DATA(p->pc3, PCSTATE_RCI_INDEX_C3_RESIDENCY);
4614 PERF_COUNTER_WRITE_DATA(p->pc6, PCSTATE_RCI_INDEX_C6_RESIDENCY);
4615 PERF_COUNTER_WRITE_DATA(p->pc7, PCSTATE_RCI_INDEX_C7_RESIDENCY);
4616 PERF_COUNTER_WRITE_DATA(p->pc8, PCSTATE_RCI_INDEX_C8_RESIDENCY);
4617 PERF_COUNTER_WRITE_DATA(p->pc9, PCSTATE_RCI_INDEX_C9_RESIDENCY);
4618 PERF_COUNTER_WRITE_DATA(p->pc10, PCSTATE_RCI_INDEX_C10_RESIDENCY);
4619
4620 #undef PERF_COUNTER_WRITE_DATA
4621
4622 return 0;
4623 }
4624
msr_counter_info_count_perf(const struct msr_counter_info_t * mci)4625 size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci)
4626 {
4627 size_t ret = 0;
4628
4629 for (int i = 0; i < NUM_MSR_COUNTERS; ++i)
4630 if (mci->source[i] == COUNTER_SOURCE_PERF)
4631 ++ret;
4632
4633 return ret;
4634 }
4635
get_smi_aperf_mperf(unsigned int cpu,struct thread_data * t)4636 int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t)
4637 {
4638 unsigned long long perf_data[NUM_MSR_COUNTERS + 1];
4639
4640 struct msr_counter_info_t *mci;
4641
4642 if (debug >= 2)
4643 fprintf(stderr, "%s: cpu%d\n", __func__, cpu);
4644
4645 assert(msr_counter_info);
4646 assert(cpu <= msr_counter_info_size);
4647
4648 mci = &msr_counter_info[cpu];
4649
4650 ZERO_ARRAY(perf_data);
4651 ZERO_ARRAY(mci->data);
4652
4653 if (mci->fd_perf != -1) {
4654 const size_t num_perf_counters = msr_counter_info_count_perf(mci);
4655 const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
4656 const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data));
4657
4658 if (actual_read_size != expected_read_size)
4659 err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
4660 actual_read_size);
4661 }
4662
4663 for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) {
4664 switch (mci->source[i]) {
4665 case COUNTER_SOURCE_NONE:
4666 break;
4667
4668 case COUNTER_SOURCE_PERF:
4669 assert(pi < ARRAY_SIZE(perf_data));
4670 assert(mci->fd_perf != -1);
4671
4672 if (debug >= 2)
4673 fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]);
4674
4675 mci->data[i] = perf_data[pi];
4676
4677 ++pi;
4678 break;
4679
4680 case COUNTER_SOURCE_MSR:
4681 assert(!no_msr);
4682
4683 if (get_msr(cpu, mci->msr[i], &mci->data[i]))
4684 return -2 - i;
4685
4686 mci->data[i] &= mci->msr_mask[i];
4687
4688 if (debug >= 2)
4689 fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]);
4690
4691 break;
4692 }
4693 }
4694
4695 BUILD_BUG_ON(NUM_MSR_COUNTERS != 3);
4696 t->aperf = mci->data[MSR_RCI_INDEX_APERF];
4697 t->mperf = mci->data[MSR_RCI_INDEX_MPERF];
4698 t->smi_count = mci->data[MSR_RCI_INDEX_SMI];
4699
4700 return 0;
4701 }
4702
perf_counter_info_read_values(struct perf_counter_info * pp,int cpu,unsigned long long * out,size_t out_size)4703 int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size)
4704 {
4705 unsigned int domain;
4706 unsigned long long value;
4707 int fd_counter;
4708
4709 for (size_t i = 0; pp; ++i, pp = pp->next) {
4710 domain = cpu_to_domain(pp, cpu);
4711 assert(domain < pp->num_domains);
4712
4713 fd_counter = pp->fd_perf_per_domain[domain];
4714
4715 if (fd_counter == -1)
4716 continue;
4717
4718 if (read(fd_counter, &value, sizeof(value)) != sizeof(value))
4719 return 1;
4720
4721 assert(i < out_size);
4722 out[i] = value * pp->scale;
4723 }
4724
4725 return 0;
4726 }
4727
pmt_gen_value_mask(unsigned int lsb,unsigned int msb)4728 unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb)
4729 {
4730 unsigned long mask;
4731
4732 if (msb == 63)
4733 mask = 0xffffffffffffffff;
4734 else
4735 mask = ((1 << (msb + 1)) - 1);
4736
4737 mask -= (1 << lsb) - 1;
4738
4739 return mask;
4740 }
4741
pmt_read_counter(struct pmt_counter * ppmt,unsigned int domain_id)4742 unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id)
4743 {
4744 if (domain_id >= ppmt->num_domains)
4745 return 0;
4746
4747 const unsigned long *pmmio = ppmt->domains[domain_id].pcounter;
4748 const unsigned long value = pmmio ? *pmmio : 0;
4749 const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb);
4750 const unsigned long value_shift = ppmt->lsb;
4751
4752 return (value & value_mask) >> value_shift;
4753 }
4754
4755 /*
4756 * get_counters(...)
4757 * migrate to cpu
4758 * acquire and record local counters for that cpu
4759 */
get_counters(struct thread_data * t,struct core_data * c,struct pkg_data * p)4760 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
4761 {
4762 int cpu = t->cpu_id;
4763 unsigned long long msr;
4764 struct msr_counter *mp;
4765 struct pmt_counter *pp;
4766 int i;
4767 int status;
4768
4769 if (cpu_migrate(cpu)) {
4770 fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
4771 return -1;
4772 }
4773
4774 gettimeofday(&t->tv_begin, (struct timezone *)NULL);
4775
4776 if (first_counter_read)
4777 get_apic_id(t);
4778
4779 t->tsc = rdtsc(); /* we are running on local CPU of interest */
4780
4781 get_smi_aperf_mperf(cpu, t);
4782
4783 if (DO_BIC(BIC_IPC))
4784 if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
4785 return -4;
4786
4787 if (DO_BIC(BIC_IRQ))
4788 t->irq_count = irqs_per_cpu[cpu];
4789 if (DO_BIC(BIC_NMI))
4790 t->nmi_count = nmi_per_cpu[cpu];
4791
4792 get_cstate_counters(cpu, t, c, p);
4793
4794 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
4795 if (get_mp(cpu, mp, &t->counter[i], mp->sp->path))
4796 return -10;
4797 }
4798
4799 if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS))
4800 return -10;
4801
4802 for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next)
4803 t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id);
4804
4805 /* collect core counters only for 1st thread in core */
4806 if (!is_cpu_first_thread_in_core(t, c, p))
4807 goto done;
4808
4809 if (platform->has_per_core_rapl) {
4810 status = get_rapl_counters(cpu, c->core_id, c, p);
4811 if (status != 0)
4812 return status;
4813 }
4814
4815 if (DO_BIC(BIC_CPU_c7) && t->is_atom) {
4816 /*
4817 * For Atom CPUs that has core cstate deeper than c6,
4818 * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
4819 * Minus CC7 (and deeper cstates) residency to get
4820 * accturate cc6 residency.
4821 */
4822 c->c6 -= c->c7;
4823 }
4824
4825 if (DO_BIC(BIC_Mod_c6))
4826 if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
4827 return -8;
4828
4829 if (DO_BIC(BIC_CoreTmp)) {
4830 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
4831 return -9;
4832 c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
4833 }
4834
4835 if (DO_BIC(BIC_CORE_THROT_CNT))
4836 get_core_throt_cnt(cpu, &c->core_throt_cnt);
4837
4838 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
4839 if (get_mp(cpu, mp, &c->counter[i], mp->sp->path))
4840 return -10;
4841 }
4842
4843 if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS))
4844 return -10;
4845
4846 for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next)
4847 c->pmt_counter[i] = pmt_read_counter(pp, c->core_id);
4848
4849 /* collect package counters only for 1st core in package */
4850 if (!is_cpu_first_core_in_package(t, c, p))
4851 goto done;
4852
4853 if (DO_BIC(BIC_Totl_c0)) {
4854 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
4855 return -10;
4856 }
4857 if (DO_BIC(BIC_Any_c0)) {
4858 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
4859 return -11;
4860 }
4861 if (DO_BIC(BIC_GFX_c0)) {
4862 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
4863 return -12;
4864 }
4865 if (DO_BIC(BIC_CPUGFX)) {
4866 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
4867 return -13;
4868 }
4869
4870 if (DO_BIC(BIC_CPU_LPI))
4871 p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
4872 if (DO_BIC(BIC_SYS_LPI))
4873 p->sys_lpi = cpuidle_cur_sys_lpi_us;
4874
4875 if (!platform->has_per_core_rapl) {
4876 status = get_rapl_counters(cpu, p->package_id, c, p);
4877 if (status != 0)
4878 return status;
4879 }
4880
4881 if (DO_BIC(BIC_PkgTmp)) {
4882 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
4883 return -17;
4884 p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
4885 }
4886
4887 if (DO_BIC(BIC_UNCORE_MHZ))
4888 p->uncore_mhz = get_legacy_uncore_mhz(p->package_id);
4889
4890 if (DO_BIC(BIC_GFX_rc6))
4891 p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
4892
4893 if (DO_BIC(BIC_GFXMHz))
4894 p->gfx_mhz = gfx_info[GFX_MHz].val;
4895
4896 if (DO_BIC(BIC_GFXACTMHz))
4897 p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
4898
4899 if (DO_BIC(BIC_SAM_mc6))
4900 p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
4901
4902 if (DO_BIC(BIC_SAMMHz))
4903 p->sam_mhz = gfx_info[SAM_MHz].val;
4904
4905 if (DO_BIC(BIC_SAMACTMHz))
4906 p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
4907
4908 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
4909 char *path = NULL;
4910
4911 if (mp->msr_num == 0) {
4912 path = find_sysfs_path_by_id(mp->sp, p->package_id);
4913 if (path == NULL) {
4914 warnx("%s: package_id %d not found", __func__, p->package_id);
4915 return -10;
4916 }
4917 }
4918 if (get_mp(cpu, mp, &p->counter[i], path))
4919 return -10;
4920 }
4921
4922 if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS))
4923 return -10;
4924
4925 for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next)
4926 p->pmt_counter[i] = pmt_read_counter(pp, p->package_id);
4927
4928 done:
4929 gettimeofday(&t->tv_end, (struct timezone *)NULL);
4930
4931 return 0;
4932 }
4933
4934 int pkg_cstate_limit = PCLUKN;
4935 char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2",
4936 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
4937 };
4938
4939 int nhm_pkg_cstate_limits[16] =
4940 { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
4941 PCLRSV, PCLRSV
4942 };
4943
4944 int snb_pkg_cstate_limits[16] =
4945 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
4946 PCLRSV, PCLRSV
4947 };
4948
4949 int hsw_pkg_cstate_limits[16] =
4950 { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
4951 PCLRSV, PCLRSV
4952 };
4953
4954 int slv_pkg_cstate_limits[16] =
4955 { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
4956 PCL__6, PCL__7
4957 };
4958
4959 int amt_pkg_cstate_limits[16] =
4960 { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
4961 PCLRSV, PCLRSV
4962 };
4963
4964 int phi_pkg_cstate_limits[16] =
4965 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
4966 PCLRSV, PCLRSV
4967 };
4968
4969 int glm_pkg_cstate_limits[16] =
4970 { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
4971 PCLRSV, PCLRSV
4972 };
4973
4974 int skx_pkg_cstate_limits[16] =
4975 { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
4976 PCLRSV, PCLRSV
4977 };
4978
4979 int icx_pkg_cstate_limits[16] =
4980 { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
4981 PCLRSV, PCLRSV
4982 };
4983
probe_cst_limit(void)4984 void probe_cst_limit(void)
4985 {
4986 unsigned long long msr;
4987 int *pkg_cstate_limits;
4988
4989 if (!platform->has_nhm_msrs || no_msr)
4990 return;
4991
4992 switch (platform->cst_limit) {
4993 case CST_LIMIT_NHM:
4994 pkg_cstate_limits = nhm_pkg_cstate_limits;
4995 break;
4996 case CST_LIMIT_SNB:
4997 pkg_cstate_limits = snb_pkg_cstate_limits;
4998 break;
4999 case CST_LIMIT_HSW:
5000 pkg_cstate_limits = hsw_pkg_cstate_limits;
5001 break;
5002 case CST_LIMIT_SKX:
5003 pkg_cstate_limits = skx_pkg_cstate_limits;
5004 break;
5005 case CST_LIMIT_ICX:
5006 pkg_cstate_limits = icx_pkg_cstate_limits;
5007 break;
5008 case CST_LIMIT_SLV:
5009 pkg_cstate_limits = slv_pkg_cstate_limits;
5010 break;
5011 case CST_LIMIT_AMT:
5012 pkg_cstate_limits = amt_pkg_cstate_limits;
5013 break;
5014 case CST_LIMIT_KNL:
5015 pkg_cstate_limits = phi_pkg_cstate_limits;
5016 break;
5017 case CST_LIMIT_GMT:
5018 pkg_cstate_limits = glm_pkg_cstate_limits;
5019 break;
5020 default:
5021 return;
5022 }
5023
5024 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
5025 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
5026 }
5027
dump_platform_info(void)5028 static void dump_platform_info(void)
5029 {
5030 unsigned long long msr;
5031 unsigned int ratio;
5032
5033 if (!platform->has_nhm_msrs || no_msr)
5034 return;
5035
5036 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
5037
5038 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
5039
5040 ratio = (msr >> 40) & 0xFF;
5041 fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
5042
5043 ratio = (msr >> 8) & 0xFF;
5044 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
5045 }
5046
dump_power_ctl(void)5047 static void dump_power_ctl(void)
5048 {
5049 unsigned long long msr;
5050
5051 if (!platform->has_nhm_msrs || no_msr)
5052 return;
5053
5054 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
5055 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
5056 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
5057
5058 /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
5059 if (platform->has_cst_prewake_bit)
5060 fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
5061
5062 return;
5063 }
5064
dump_turbo_ratio_limit2(void)5065 static void dump_turbo_ratio_limit2(void)
5066 {
5067 unsigned long long msr;
5068 unsigned int ratio;
5069
5070 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
5071
5072 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
5073
5074 ratio = (msr >> 8) & 0xFF;
5075 if (ratio)
5076 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
5077
5078 ratio = (msr >> 0) & 0xFF;
5079 if (ratio)
5080 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
5081 return;
5082 }
5083
dump_turbo_ratio_limit1(void)5084 static void dump_turbo_ratio_limit1(void)
5085 {
5086 unsigned long long msr;
5087 unsigned int ratio;
5088
5089 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
5090
5091 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
5092
5093 ratio = (msr >> 56) & 0xFF;
5094 if (ratio)
5095 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
5096
5097 ratio = (msr >> 48) & 0xFF;
5098 if (ratio)
5099 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
5100
5101 ratio = (msr >> 40) & 0xFF;
5102 if (ratio)
5103 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
5104
5105 ratio = (msr >> 32) & 0xFF;
5106 if (ratio)
5107 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
5108
5109 ratio = (msr >> 24) & 0xFF;
5110 if (ratio)
5111 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
5112
5113 ratio = (msr >> 16) & 0xFF;
5114 if (ratio)
5115 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
5116
5117 ratio = (msr >> 8) & 0xFF;
5118 if (ratio)
5119 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
5120
5121 ratio = (msr >> 0) & 0xFF;
5122 if (ratio)
5123 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
5124 return;
5125 }
5126
dump_turbo_ratio_limits(int trl_msr_offset)5127 static void dump_turbo_ratio_limits(int trl_msr_offset)
5128 {
5129 unsigned long long msr, core_counts;
5130 int shift;
5131
5132 get_msr(base_cpu, trl_msr_offset, &msr);
5133 fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
5134 base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
5135
5136 if (platform->trl_msrs & TRL_CORECOUNT) {
5137 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
5138 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
5139 } else {
5140 core_counts = 0x0807060504030201;
5141 }
5142
5143 for (shift = 56; shift >= 0; shift -= 8) {
5144 unsigned int ratio, group_size;
5145
5146 ratio = (msr >> shift) & 0xFF;
5147 group_size = (core_counts >> shift) & 0xFF;
5148 if (ratio)
5149 fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
5150 ratio, bclk, ratio * bclk, group_size);
5151 }
5152
5153 return;
5154 }
5155
dump_atom_turbo_ratio_limits(void)5156 static void dump_atom_turbo_ratio_limits(void)
5157 {
5158 unsigned long long msr;
5159 unsigned int ratio;
5160
5161 get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
5162 fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
5163
5164 ratio = (msr >> 0) & 0x3F;
5165 if (ratio)
5166 fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
5167
5168 ratio = (msr >> 8) & 0x3F;
5169 if (ratio)
5170 fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
5171
5172 ratio = (msr >> 16) & 0x3F;
5173 if (ratio)
5174 fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
5175
5176 get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
5177 fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
5178
5179 ratio = (msr >> 24) & 0x3F;
5180 if (ratio)
5181 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
5182
5183 ratio = (msr >> 16) & 0x3F;
5184 if (ratio)
5185 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
5186
5187 ratio = (msr >> 8) & 0x3F;
5188 if (ratio)
5189 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
5190
5191 ratio = (msr >> 0) & 0x3F;
5192 if (ratio)
5193 fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
5194 }
5195
dump_knl_turbo_ratio_limits(void)5196 static void dump_knl_turbo_ratio_limits(void)
5197 {
5198 const unsigned int buckets_no = 7;
5199
5200 unsigned long long msr;
5201 int delta_cores, delta_ratio;
5202 int i, b_nr;
5203 unsigned int cores[buckets_no];
5204 unsigned int ratio[buckets_no];
5205
5206 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
5207
5208 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
5209
5210 /*
5211 * Turbo encoding in KNL is as follows:
5212 * [0] -- Reserved
5213 * [7:1] -- Base value of number of active cores of bucket 1.
5214 * [15:8] -- Base value of freq ratio of bucket 1.
5215 * [20:16] -- +ve delta of number of active cores of bucket 2.
5216 * i.e. active cores of bucket 2 =
5217 * active cores of bucket 1 + delta
5218 * [23:21] -- Negative delta of freq ratio of bucket 2.
5219 * i.e. freq ratio of bucket 2 =
5220 * freq ratio of bucket 1 - delta
5221 * [28:24]-- +ve delta of number of active cores of bucket 3.
5222 * [31:29]-- -ve delta of freq ratio of bucket 3.
5223 * [36:32]-- +ve delta of number of active cores of bucket 4.
5224 * [39:37]-- -ve delta of freq ratio of bucket 4.
5225 * [44:40]-- +ve delta of number of active cores of bucket 5.
5226 * [47:45]-- -ve delta of freq ratio of bucket 5.
5227 * [52:48]-- +ve delta of number of active cores of bucket 6.
5228 * [55:53]-- -ve delta of freq ratio of bucket 6.
5229 * [60:56]-- +ve delta of number of active cores of bucket 7.
5230 * [63:61]-- -ve delta of freq ratio of bucket 7.
5231 */
5232
5233 b_nr = 0;
5234 cores[b_nr] = (msr & 0xFF) >> 1;
5235 ratio[b_nr] = (msr >> 8) & 0xFF;
5236
5237 for (i = 16; i < 64; i += 8) {
5238 delta_cores = (msr >> i) & 0x1F;
5239 delta_ratio = (msr >> (i + 5)) & 0x7;
5240
5241 cores[b_nr + 1] = cores[b_nr] + delta_cores;
5242 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
5243 b_nr++;
5244 }
5245
5246 for (i = buckets_no - 1; i >= 0; i--)
5247 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
5248 fprintf(outf,
5249 "%d * %.1f = %.1f MHz max turbo %d active cores\n",
5250 ratio[i], bclk, ratio[i] * bclk, cores[i]);
5251 }
5252
dump_cst_cfg(void)5253 static void dump_cst_cfg(void)
5254 {
5255 unsigned long long msr;
5256
5257 if (!platform->has_nhm_msrs || no_msr)
5258 return;
5259
5260 get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
5261
5262 fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
5263
5264 fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
5265 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
5266 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
5267 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
5268 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
5269 (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
5270
5271 #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
5272 if (platform->has_cst_auto_convension) {
5273 fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
5274 }
5275
5276 fprintf(outf, ")\n");
5277
5278 return;
5279 }
5280
dump_config_tdp(void)5281 static void dump_config_tdp(void)
5282 {
5283 unsigned long long msr;
5284
5285 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
5286 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
5287 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
5288
5289 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
5290 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
5291 if (msr) {
5292 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
5293 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
5294 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
5295 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
5296 }
5297 fprintf(outf, ")\n");
5298
5299 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
5300 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
5301 if (msr) {
5302 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
5303 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
5304 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
5305 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
5306 }
5307 fprintf(outf, ")\n");
5308
5309 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
5310 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
5311 if ((msr) & 0x3)
5312 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
5313 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
5314 fprintf(outf, ")\n");
5315
5316 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
5317 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
5318 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
5319 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
5320 fprintf(outf, ")\n");
5321 }
5322
5323 unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
5324
print_irtl(void)5325 void print_irtl(void)
5326 {
5327 unsigned long long msr;
5328
5329 if (!platform->has_irtl_msrs || no_msr)
5330 return;
5331
5332 if (platform->supported_cstates & PC3) {
5333 get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
5334 fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
5335 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5336 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5337 }
5338
5339 if (platform->supported_cstates & PC6) {
5340 get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
5341 fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
5342 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5343 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5344 }
5345
5346 if (platform->supported_cstates & PC7) {
5347 get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
5348 fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
5349 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5350 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5351 }
5352
5353 if (platform->supported_cstates & PC8) {
5354 get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
5355 fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
5356 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5357 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5358 }
5359
5360 if (platform->supported_cstates & PC9) {
5361 get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
5362 fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
5363 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5364 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5365 }
5366
5367 if (platform->supported_cstates & PC10) {
5368 get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
5369 fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
5370 fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
5371 (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
5372 }
5373 }
5374
free_fd_percpu(void)5375 void free_fd_percpu(void)
5376 {
5377 int i;
5378
5379 if (!fd_percpu)
5380 return;
5381
5382 for (i = 0; i < topo.max_cpu_num + 1; ++i) {
5383 if (fd_percpu[i] != 0)
5384 close(fd_percpu[i]);
5385 }
5386
5387 free(fd_percpu);
5388 fd_percpu = NULL;
5389 }
5390
free_fd_instr_count_percpu(void)5391 void free_fd_instr_count_percpu(void)
5392 {
5393 if (!fd_instr_count_percpu)
5394 return;
5395
5396 for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
5397 if (fd_instr_count_percpu[i] != 0)
5398 close(fd_instr_count_percpu[i]);
5399 }
5400
5401 free(fd_instr_count_percpu);
5402 fd_instr_count_percpu = NULL;
5403 }
5404
free_fd_cstate(void)5405 void free_fd_cstate(void)
5406 {
5407 if (!ccstate_counter_info)
5408 return;
5409
5410 const int counter_info_num = ccstate_counter_info_size;
5411
5412 for (int counter_id = 0; counter_id < counter_info_num; ++counter_id) {
5413 if (ccstate_counter_info[counter_id].fd_perf_core != -1)
5414 close(ccstate_counter_info[counter_id].fd_perf_core);
5415
5416 if (ccstate_counter_info[counter_id].fd_perf_pkg != -1)
5417 close(ccstate_counter_info[counter_id].fd_perf_pkg);
5418 }
5419
5420 free(ccstate_counter_info);
5421 ccstate_counter_info = NULL;
5422 ccstate_counter_info_size = 0;
5423 }
5424
free_fd_msr(void)5425 void free_fd_msr(void)
5426 {
5427 if (!msr_counter_info)
5428 return;
5429
5430 for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) {
5431 if (msr_counter_info[cpu].fd_perf != -1)
5432 close(msr_counter_info[cpu].fd_perf);
5433 }
5434
5435 free(msr_counter_info);
5436 msr_counter_info = NULL;
5437 msr_counter_info_size = 0;
5438 }
5439
free_fd_rapl_percpu(void)5440 void free_fd_rapl_percpu(void)
5441 {
5442 if (!rapl_counter_info_perdomain)
5443 return;
5444
5445 const int num_domains = rapl_counter_info_perdomain_size;
5446
5447 for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
5448 if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
5449 close(rapl_counter_info_perdomain[domain_id].fd_perf);
5450 }
5451
5452 free(rapl_counter_info_perdomain);
5453 rapl_counter_info_perdomain = NULL;
5454 rapl_counter_info_perdomain_size = 0;
5455 }
5456
free_fd_added_perf_counters_(struct perf_counter_info * pp)5457 void free_fd_added_perf_counters_(struct perf_counter_info *pp)
5458 {
5459 if (!pp)
5460 return;
5461
5462 if (!pp->fd_perf_per_domain)
5463 return;
5464
5465 while (pp) {
5466 for (size_t domain = 0; domain < pp->num_domains; ++domain) {
5467 if (pp->fd_perf_per_domain[domain] != -1) {
5468 close(pp->fd_perf_per_domain[domain]);
5469 pp->fd_perf_per_domain[domain] = -1;
5470 }
5471 }
5472
5473 free(pp->fd_perf_per_domain);
5474 pp->fd_perf_per_domain = NULL;
5475
5476 pp = pp->next;
5477 }
5478 }
5479
free_fd_added_perf_counters(void)5480 void free_fd_added_perf_counters(void)
5481 {
5482 free_fd_added_perf_counters_(sys.perf_tp);
5483 free_fd_added_perf_counters_(sys.perf_cp);
5484 free_fd_added_perf_counters_(sys.perf_pp);
5485 }
5486
free_all_buffers(void)5487 void free_all_buffers(void)
5488 {
5489 int i;
5490
5491 CPU_FREE(cpu_present_set);
5492 cpu_present_set = NULL;
5493 cpu_present_setsize = 0;
5494
5495 CPU_FREE(cpu_effective_set);
5496 cpu_effective_set = NULL;
5497 cpu_effective_setsize = 0;
5498
5499 CPU_FREE(cpu_allowed_set);
5500 cpu_allowed_set = NULL;
5501 cpu_allowed_setsize = 0;
5502
5503 CPU_FREE(cpu_affinity_set);
5504 cpu_affinity_set = NULL;
5505 cpu_affinity_setsize = 0;
5506
5507 free(thread_even);
5508 free(core_even);
5509 free(package_even);
5510
5511 thread_even = NULL;
5512 core_even = NULL;
5513 package_even = NULL;
5514
5515 free(thread_odd);
5516 free(core_odd);
5517 free(package_odd);
5518
5519 thread_odd = NULL;
5520 core_odd = NULL;
5521 package_odd = NULL;
5522
5523 free(output_buffer);
5524 output_buffer = NULL;
5525 outp = NULL;
5526
5527 free_fd_percpu();
5528 free_fd_instr_count_percpu();
5529 free_fd_msr();
5530 free_fd_rapl_percpu();
5531 free_fd_cstate();
5532 free_fd_added_perf_counters();
5533
5534 free(irq_column_2_cpu);
5535 free(irqs_per_cpu);
5536 free(nmi_per_cpu);
5537
5538 for (i = 0; i <= topo.max_cpu_num; ++i) {
5539 if (cpus[i].put_ids)
5540 CPU_FREE(cpus[i].put_ids);
5541 }
5542 free(cpus);
5543 }
5544
5545 /*
5546 * Parse a file containing a single int.
5547 * Return 0 if file can not be opened
5548 * Exit if file can be opened, but can not be parsed
5549 */
parse_int_file(const char * fmt,...)5550 int parse_int_file(const char *fmt, ...)
5551 {
5552 va_list args;
5553 char path[PATH_MAX];
5554 FILE *filep;
5555 int value;
5556
5557 va_start(args, fmt);
5558 vsnprintf(path, sizeof(path), fmt, args);
5559 va_end(args);
5560 filep = fopen(path, "r");
5561 if (!filep)
5562 return 0;
5563 if (fscanf(filep, "%d", &value) != 1)
5564 err(1, "%s: failed to parse number from file", path);
5565 fclose(filep);
5566 return value;
5567 }
5568
5569 /*
5570 * cpu_is_first_core_in_package(cpu)
5571 * return 1 if given CPU is 1st core in package
5572 */
cpu_is_first_core_in_package(int cpu)5573 int cpu_is_first_core_in_package(int cpu)
5574 {
5575 return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
5576 }
5577
get_physical_package_id(int cpu)5578 int get_physical_package_id(int cpu)
5579 {
5580 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
5581 }
5582
get_die_id(int cpu)5583 int get_die_id(int cpu)
5584 {
5585 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu);
5586 }
5587
get_core_id(int cpu)5588 int get_core_id(int cpu)
5589 {
5590 return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
5591 }
5592
set_node_data(void)5593 void set_node_data(void)
5594 {
5595 int pkg, node, lnode, cpu, cpux;
5596 int cpu_count;
5597
5598 /* initialize logical_node_id */
5599 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
5600 cpus[cpu].logical_node_id = -1;
5601
5602 cpu_count = 0;
5603 for (pkg = 0; pkg < topo.num_packages; pkg++) {
5604 lnode = 0;
5605 for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
5606 if (cpus[cpu].physical_package_id != pkg)
5607 continue;
5608 /* find a cpu with an unset logical_node_id */
5609 if (cpus[cpu].logical_node_id != -1)
5610 continue;
5611 cpus[cpu].logical_node_id = lnode;
5612 node = cpus[cpu].physical_node_id;
5613 cpu_count++;
5614 /*
5615 * find all matching cpus on this pkg and set
5616 * the logical_node_id
5617 */
5618 for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
5619 if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
5620 cpus[cpux].logical_node_id = lnode;
5621 cpu_count++;
5622 }
5623 }
5624 lnode++;
5625 if (lnode > topo.nodes_per_pkg)
5626 topo.nodes_per_pkg = lnode;
5627 }
5628 if (cpu_count >= topo.max_cpu_num)
5629 break;
5630 }
5631 }
5632
get_physical_node_id(struct cpu_topology * thiscpu)5633 int get_physical_node_id(struct cpu_topology *thiscpu)
5634 {
5635 char path[80];
5636 FILE *filep;
5637 int i;
5638 int cpu = thiscpu->logical_cpu_id;
5639
5640 for (i = 0; i <= topo.max_cpu_num; i++) {
5641 sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
5642 filep = fopen(path, "r");
5643 if (!filep)
5644 continue;
5645 fclose(filep);
5646 return i;
5647 }
5648 return -1;
5649 }
5650
parse_cpu_str(char * cpu_str,cpu_set_t * cpu_set,int cpu_set_size)5651 static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size)
5652 {
5653 unsigned int start, end;
5654 char *next = cpu_str;
5655
5656 while (next && *next) {
5657
5658 if (*next == '-') /* no negative cpu numbers */
5659 return 1;
5660
5661 if (*next == '\0' || *next == '\n')
5662 break;
5663
5664 start = strtoul(next, &next, 10);
5665
5666 if (start >= CPU_SUBSET_MAXCPUS)
5667 return 1;
5668 CPU_SET_S(start, cpu_set_size, cpu_set);
5669
5670 if (*next == '\0' || *next == '\n')
5671 break;
5672
5673 if (*next == ',') {
5674 next += 1;
5675 continue;
5676 }
5677
5678 if (*next == '-') {
5679 next += 1; /* start range */
5680 } else if (*next == '.') {
5681 next += 1;
5682 if (*next == '.')
5683 next += 1; /* start range */
5684 else
5685 return 1;
5686 }
5687
5688 end = strtoul(next, &next, 10);
5689 if (end <= start)
5690 return 1;
5691
5692 while (++start <= end) {
5693 if (start >= CPU_SUBSET_MAXCPUS)
5694 return 1;
5695 CPU_SET_S(start, cpu_set_size, cpu_set);
5696 }
5697
5698 if (*next == ',')
5699 next += 1;
5700 else if (*next != '\0' && *next != '\n')
5701 return 1;
5702 }
5703
5704 return 0;
5705 }
5706
get_thread_siblings(struct cpu_topology * thiscpu)5707 int get_thread_siblings(struct cpu_topology *thiscpu)
5708 {
5709 char path[80], character;
5710 FILE *filep;
5711 unsigned long map;
5712 int so, shift, sib_core;
5713 int cpu = thiscpu->logical_cpu_id;
5714 int offset = topo.max_cpu_num + 1;
5715 size_t size;
5716 int thread_id = 0;
5717
5718 thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
5719 if (thiscpu->thread_id < 0)
5720 thiscpu->thread_id = thread_id++;
5721 if (!thiscpu->put_ids)
5722 return -1;
5723
5724 size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
5725 CPU_ZERO_S(size, thiscpu->put_ids);
5726
5727 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
5728 filep = fopen(path, "r");
5729
5730 if (!filep) {
5731 warnx("%s: open failed", path);
5732 return -1;
5733 }
5734 do {
5735 offset -= BITMASK_SIZE;
5736 if (fscanf(filep, "%lx%c", &map, &character) != 2)
5737 err(1, "%s: failed to parse file", path);
5738 for (shift = 0; shift < BITMASK_SIZE; shift++) {
5739 if ((map >> shift) & 0x1) {
5740 so = shift + offset;
5741 sib_core = get_core_id(so);
5742 if (sib_core == thiscpu->physical_core_id) {
5743 CPU_SET_S(so, size, thiscpu->put_ids);
5744 if ((so != cpu) && (cpus[so].thread_id < 0))
5745 cpus[so].thread_id = thread_id++;
5746 }
5747 }
5748 }
5749 } while (character == ',');
5750 fclose(filep);
5751
5752 return CPU_COUNT_S(size, thiscpu->put_ids);
5753 }
5754
5755 /*
5756 * run func(thread, core, package) in topology order
5757 * skip non-present cpus
5758 */
5759
for_all_cpus_2(int (func)(struct thread_data *,struct core_data *,struct pkg_data *,struct thread_data *,struct core_data *,struct pkg_data *),struct thread_data * thread_base,struct core_data * core_base,struct pkg_data * pkg_base,struct thread_data * thread_base2,struct core_data * core_base2,struct pkg_data * pkg_base2)5760 int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
5761 struct pkg_data *, struct thread_data *, struct core_data *,
5762 struct pkg_data *), struct thread_data *thread_base,
5763 struct core_data *core_base, struct pkg_data *pkg_base,
5764 struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
5765 {
5766 int retval, pkg_no, node_no, core_no, thread_no;
5767
5768 retval = 0;
5769
5770 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
5771 for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
5772 for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
5773 for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
5774 struct thread_data *t, *t2;
5775 struct core_data *c, *c2;
5776 struct pkg_data *p, *p2;
5777
5778 t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
5779
5780 if (cpu_is_not_allowed(t->cpu_id))
5781 continue;
5782
5783 t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
5784
5785 c = GET_CORE(core_base, core_no, node_no, pkg_no);
5786 c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
5787
5788 p = GET_PKG(pkg_base, pkg_no);
5789 p2 = GET_PKG(pkg_base2, pkg_no);
5790
5791 retval |= func(t, c, p, t2, c2, p2);
5792 }
5793 }
5794 }
5795 }
5796 return retval;
5797 }
5798
5799 /*
5800 * run func(cpu) on every cpu in /proc/stat
5801 * return max_cpu number
5802 */
for_all_proc_cpus(int (func)(int))5803 int for_all_proc_cpus(int (func) (int))
5804 {
5805 FILE *fp;
5806 int cpu_num;
5807 int retval;
5808
5809 fp = fopen_or_die(proc_stat, "r");
5810
5811 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
5812 if (retval != 0)
5813 err(1, "%s: failed to parse format", proc_stat);
5814
5815 while (1) {
5816 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
5817 if (retval != 1)
5818 break;
5819
5820 retval = func(cpu_num);
5821 if (retval) {
5822 fclose(fp);
5823 return (retval);
5824 }
5825 }
5826 fclose(fp);
5827 return 0;
5828 }
5829
5830 #define PATH_EFFECTIVE_CPUS "/sys/fs/cgroup/cpuset.cpus.effective"
5831
5832 static char cpu_effective_str[1024];
5833
update_effective_str(bool startup)5834 static int update_effective_str(bool startup)
5835 {
5836 FILE *fp;
5837 char *pos;
5838 char buf[1024];
5839 int ret;
5840
5841 if (cpu_effective_str[0] == '\0' && !startup)
5842 return 0;
5843
5844 fp = fopen(PATH_EFFECTIVE_CPUS, "r");
5845 if (!fp)
5846 return 0;
5847
5848 pos = fgets(buf, 1024, fp);
5849 if (!pos)
5850 err(1, "%s: file read failed\n", PATH_EFFECTIVE_CPUS);
5851
5852 fclose(fp);
5853
5854 ret = strncmp(cpu_effective_str, buf, 1024);
5855 if (!ret)
5856 return 0;
5857
5858 strncpy(cpu_effective_str, buf, 1024);
5859 return 1;
5860 }
5861
update_effective_set(bool startup)5862 static void update_effective_set(bool startup)
5863 {
5864 update_effective_str(startup);
5865
5866 if (parse_cpu_str(cpu_effective_str, cpu_effective_set, cpu_effective_setsize))
5867 err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
5868 }
5869
5870 void linux_perf_init(void);
5871 void msr_perf_init(void);
5872 void rapl_perf_init(void);
5873 void cstate_perf_init(void);
5874 void added_perf_counters_init(void);
5875 void pmt_init(void);
5876
re_initialize(void)5877 void re_initialize(void)
5878 {
5879 free_all_buffers();
5880 setup_all_buffers(false);
5881 linux_perf_init();
5882 msr_perf_init();
5883 rapl_perf_init();
5884 cstate_perf_init();
5885 added_perf_counters_init();
5886 pmt_init();
5887 fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
5888 topo.allowed_cpus);
5889 }
5890
set_max_cpu_num(void)5891 void set_max_cpu_num(void)
5892 {
5893 FILE *filep;
5894 int base_cpu;
5895 unsigned long dummy;
5896 char pathname[64];
5897
5898 base_cpu = sched_getcpu();
5899 if (base_cpu < 0)
5900 err(1, "cannot find calling cpu ID");
5901 sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu);
5902
5903 filep = fopen_or_die(pathname, "r");
5904 topo.max_cpu_num = 0;
5905 while (fscanf(filep, "%lx,", &dummy) == 1)
5906 topo.max_cpu_num += BITMASK_SIZE;
5907 fclose(filep);
5908 topo.max_cpu_num--; /* 0 based */
5909 }
5910
5911 /*
5912 * count_cpus()
5913 * remember the last one seen, it will be the max
5914 */
count_cpus(int cpu)5915 int count_cpus(int cpu)
5916 {
5917 UNUSED(cpu);
5918
5919 topo.num_cpus++;
5920 return 0;
5921 }
5922
mark_cpu_present(int cpu)5923 int mark_cpu_present(int cpu)
5924 {
5925 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
5926 return 0;
5927 }
5928
init_thread_id(int cpu)5929 int init_thread_id(int cpu)
5930 {
5931 cpus[cpu].thread_id = -1;
5932 return 0;
5933 }
5934
set_my_cpu_type(void)5935 int set_my_cpu_type(void)
5936 {
5937 unsigned int eax, ebx, ecx, edx;
5938 unsigned int max_level;
5939
5940 __cpuid(0, max_level, ebx, ecx, edx);
5941
5942 if (max_level < CPUID_LEAF_MODEL_ID)
5943 return 0;
5944
5945 __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx);
5946
5947 return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT);
5948 }
5949
set_cpu_hybrid_type(int cpu)5950 int set_cpu_hybrid_type(int cpu)
5951 {
5952 if (cpu_migrate(cpu))
5953 return -1;
5954
5955 int type = set_my_cpu_type();
5956
5957 cpus[cpu].type = type;
5958 return 0;
5959 }
5960
5961 /*
5962 * snapshot_proc_interrupts()
5963 *
5964 * read and record summary of /proc/interrupts
5965 *
5966 * return 1 if config change requires a restart, else return 0
5967 */
snapshot_proc_interrupts(void)5968 int snapshot_proc_interrupts(void)
5969 {
5970 static FILE *fp;
5971 int column, retval;
5972
5973 if (fp == NULL)
5974 fp = fopen_or_die("/proc/interrupts", "r");
5975 else
5976 rewind(fp);
5977
5978 /* read 1st line of /proc/interrupts to get cpu* name for each column */
5979 for (column = 0; column < topo.num_cpus; ++column) {
5980 int cpu_number;
5981
5982 retval = fscanf(fp, " CPU%d", &cpu_number);
5983 if (retval != 1)
5984 break;
5985
5986 if (cpu_number > topo.max_cpu_num) {
5987 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
5988 return 1;
5989 }
5990
5991 irq_column_2_cpu[column] = cpu_number;
5992 irqs_per_cpu[cpu_number] = 0;
5993 nmi_per_cpu[cpu_number] = 0;
5994 }
5995
5996 /* read /proc/interrupt count lines and sum up irqs per cpu */
5997 while (1) {
5998 int column;
5999 char buf[64];
6000 int this_row_is_nmi = 0;
6001
6002 retval = fscanf(fp, " %s:", buf); /* irq# "N:" */
6003 if (retval != 1)
6004 break;
6005
6006 if (strncmp(buf, "NMI", strlen("NMI")) == 0)
6007 this_row_is_nmi = 1;
6008
6009 /* read the count per cpu */
6010 for (column = 0; column < topo.num_cpus; ++column) {
6011
6012 int cpu_number, irq_count;
6013
6014 retval = fscanf(fp, " %d", &irq_count);
6015
6016 if (retval != 1)
6017 break;
6018
6019 cpu_number = irq_column_2_cpu[column];
6020 irqs_per_cpu[cpu_number] += irq_count;
6021 if (this_row_is_nmi)
6022 nmi_per_cpu[cpu_number] += irq_count;
6023 }
6024 while (getc(fp) != '\n') ; /* flush interrupt description */
6025
6026 }
6027 return 0;
6028 }
6029
6030 /*
6031 * snapshot_graphics()
6032 *
6033 * record snapshot of specified graphics sysfs knob
6034 *
6035 * return 1 if config change requires a restart, else return 0
6036 */
snapshot_graphics(int idx)6037 int snapshot_graphics(int idx)
6038 {
6039 int retval;
6040
6041 rewind(gfx_info[idx].fp);
6042 fflush(gfx_info[idx].fp);
6043
6044 switch (idx) {
6045 case GFX_rc6:
6046 case SAM_mc6:
6047 retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull);
6048 if (retval != 1)
6049 err(1, "rc6");
6050 return 0;
6051 case GFX_MHz:
6052 case GFX_ACTMHz:
6053 case SAM_MHz:
6054 case SAM_ACTMHz:
6055 retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
6056 if (retval != 1)
6057 err(1, "MHz");
6058 return 0;
6059 default:
6060 return -EINVAL;
6061 }
6062 }
6063
6064 /*
6065 * snapshot_cpu_lpi()
6066 *
6067 * record snapshot of
6068 * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
6069 */
snapshot_cpu_lpi_us(void)6070 int snapshot_cpu_lpi_us(void)
6071 {
6072 FILE *fp;
6073 int retval;
6074
6075 fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
6076
6077 retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
6078 if (retval != 1) {
6079 fprintf(stderr, "Disabling Low Power Idle CPU output\n");
6080 BIC_NOT_PRESENT(BIC_CPU_LPI);
6081 fclose(fp);
6082 return -1;
6083 }
6084
6085 fclose(fp);
6086
6087 return 0;
6088 }
6089
6090 /*
6091 * snapshot_sys_lpi()
6092 *
6093 * record snapshot of sys_lpi_file
6094 */
snapshot_sys_lpi_us(void)6095 int snapshot_sys_lpi_us(void)
6096 {
6097 FILE *fp;
6098 int retval;
6099
6100 fp = fopen_or_die(sys_lpi_file, "r");
6101
6102 retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
6103 if (retval != 1) {
6104 fprintf(stderr, "Disabling Low Power Idle System output\n");
6105 BIC_NOT_PRESENT(BIC_SYS_LPI);
6106 fclose(fp);
6107 return -1;
6108 }
6109 fclose(fp);
6110
6111 return 0;
6112 }
6113
6114 /*
6115 * snapshot /proc and /sys files
6116 *
6117 * return 1 if configuration restart needed, else return 0
6118 */
snapshot_proc_sysfs_files(void)6119 int snapshot_proc_sysfs_files(void)
6120 {
6121 gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL);
6122
6123 if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI))
6124 if (snapshot_proc_interrupts())
6125 return 1;
6126
6127 if (DO_BIC(BIC_GFX_rc6))
6128 snapshot_graphics(GFX_rc6);
6129
6130 if (DO_BIC(BIC_GFXMHz))
6131 snapshot_graphics(GFX_MHz);
6132
6133 if (DO_BIC(BIC_GFXACTMHz))
6134 snapshot_graphics(GFX_ACTMHz);
6135
6136 if (DO_BIC(BIC_SAM_mc6))
6137 snapshot_graphics(SAM_mc6);
6138
6139 if (DO_BIC(BIC_SAMMHz))
6140 snapshot_graphics(SAM_MHz);
6141
6142 if (DO_BIC(BIC_SAMACTMHz))
6143 snapshot_graphics(SAM_ACTMHz);
6144
6145 if (DO_BIC(BIC_CPU_LPI))
6146 snapshot_cpu_lpi_us();
6147
6148 if (DO_BIC(BIC_SYS_LPI))
6149 snapshot_sys_lpi_us();
6150
6151 return 0;
6152 }
6153
6154 int exit_requested;
6155
signal_handler(int signal)6156 static void signal_handler(int signal)
6157 {
6158 switch (signal) {
6159 case SIGINT:
6160 exit_requested = 1;
6161 if (debug)
6162 fprintf(stderr, " SIGINT\n");
6163 break;
6164 case SIGUSR1:
6165 if (debug > 1)
6166 fprintf(stderr, "SIGUSR1\n");
6167 break;
6168 }
6169 }
6170
setup_signal_handler(void)6171 void setup_signal_handler(void)
6172 {
6173 struct sigaction sa;
6174
6175 memset(&sa, 0, sizeof(sa));
6176
6177 sa.sa_handler = &signal_handler;
6178
6179 if (sigaction(SIGINT, &sa, NULL) < 0)
6180 err(1, "sigaction SIGINT");
6181 if (sigaction(SIGUSR1, &sa, NULL) < 0)
6182 err(1, "sigaction SIGUSR1");
6183 }
6184
do_sleep(void)6185 void do_sleep(void)
6186 {
6187 struct timeval tout;
6188 struct timespec rest;
6189 fd_set readfds;
6190 int retval;
6191
6192 FD_ZERO(&readfds);
6193 FD_SET(0, &readfds);
6194
6195 if (ignore_stdin) {
6196 nanosleep(&interval_ts, NULL);
6197 return;
6198 }
6199
6200 tout = interval_tv;
6201 retval = select(1, &readfds, NULL, NULL, &tout);
6202
6203 if (retval == 1) {
6204 switch (getc(stdin)) {
6205 case 'q':
6206 exit_requested = 1;
6207 break;
6208 case EOF:
6209 /*
6210 * 'stdin' is a pipe closed on the other end. There
6211 * won't be any further input.
6212 */
6213 ignore_stdin = 1;
6214 /* Sleep the rest of the time */
6215 rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
6216 rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
6217 nanosleep(&rest, NULL);
6218 }
6219 }
6220 }
6221
get_msr_sum(int cpu,off_t offset,unsigned long long * msr)6222 int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
6223 {
6224 int ret, idx;
6225 unsigned long long msr_cur, msr_last;
6226
6227 assert(!no_msr);
6228
6229 if (!per_cpu_msr_sum)
6230 return 1;
6231
6232 idx = offset_to_idx(offset);
6233 if (idx < 0)
6234 return idx;
6235 /* get_msr_sum() = sum + (get_msr() - last) */
6236 ret = get_msr(cpu, offset, &msr_cur);
6237 if (ret)
6238 return ret;
6239 msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
6240 DELTA_WRAP32(msr_cur, msr_last);
6241 *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
6242
6243 return 0;
6244 }
6245
6246 timer_t timerid;
6247
6248 /* Timer callback, update the sum of MSRs periodically. */
update_msr_sum(struct thread_data * t,struct core_data * c,struct pkg_data * p)6249 static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
6250 {
6251 int i, ret;
6252 int cpu = t->cpu_id;
6253
6254 UNUSED(c);
6255 UNUSED(p);
6256
6257 assert(!no_msr);
6258
6259 for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
6260 unsigned long long msr_cur, msr_last;
6261 off_t offset;
6262
6263 if (!idx_valid(i))
6264 continue;
6265 offset = idx_to_offset(i);
6266 if (offset < 0)
6267 continue;
6268 ret = get_msr(cpu, offset, &msr_cur);
6269 if (ret) {
6270 fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
6271 continue;
6272 }
6273
6274 msr_last = per_cpu_msr_sum[cpu].entries[i].last;
6275 per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
6276
6277 DELTA_WRAP32(msr_cur, msr_last);
6278 per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
6279 }
6280 return 0;
6281 }
6282
msr_record_handler(union sigval v)6283 static void msr_record_handler(union sigval v)
6284 {
6285 UNUSED(v);
6286
6287 for_all_cpus(update_msr_sum, EVEN_COUNTERS);
6288 }
6289
msr_sum_record(void)6290 void msr_sum_record(void)
6291 {
6292 struct itimerspec its;
6293 struct sigevent sev;
6294
6295 per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
6296 if (!per_cpu_msr_sum) {
6297 fprintf(outf, "Can not allocate memory for long time MSR.\n");
6298 return;
6299 }
6300 /*
6301 * Signal handler might be restricted, so use thread notifier instead.
6302 */
6303 memset(&sev, 0, sizeof(struct sigevent));
6304 sev.sigev_notify = SIGEV_THREAD;
6305 sev.sigev_notify_function = msr_record_handler;
6306
6307 sev.sigev_value.sival_ptr = &timerid;
6308 if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
6309 fprintf(outf, "Can not create timer.\n");
6310 goto release_msr;
6311 }
6312
6313 its.it_value.tv_sec = 0;
6314 its.it_value.tv_nsec = 1;
6315 /*
6316 * A wraparound time has been calculated early.
6317 * Some sources state that the peak power for a
6318 * microprocessor is usually 1.5 times the TDP rating,
6319 * use 2 * TDP for safety.
6320 */
6321 its.it_interval.tv_sec = rapl_joule_counter_range / 2;
6322 its.it_interval.tv_nsec = 0;
6323
6324 if (timer_settime(timerid, 0, &its, NULL) == -1) {
6325 fprintf(outf, "Can not set timer.\n");
6326 goto release_timer;
6327 }
6328 return;
6329
6330 release_timer:
6331 timer_delete(timerid);
6332 release_msr:
6333 free(per_cpu_msr_sum);
6334 }
6335
6336 /*
6337 * set_my_sched_priority(pri)
6338 * return previous priority on success
6339 * return value < -20 on failure
6340 */
set_my_sched_priority(int priority)6341 int set_my_sched_priority(int priority)
6342 {
6343 int retval;
6344 int original_priority;
6345
6346 errno = 0;
6347 original_priority = getpriority(PRIO_PROCESS, 0);
6348 if (errno && (original_priority == -1))
6349 return -21;
6350
6351 retval = setpriority(PRIO_PROCESS, 0, priority);
6352 if (retval)
6353 return -21;
6354
6355 errno = 0;
6356 retval = getpriority(PRIO_PROCESS, 0);
6357 if (retval != priority)
6358 return -21;
6359
6360 return original_priority;
6361 }
6362
turbostat_loop()6363 void turbostat_loop()
6364 {
6365 int retval;
6366 int restarted = 0;
6367 unsigned int done_iters = 0;
6368
6369 setup_signal_handler();
6370
6371 /*
6372 * elevate own priority for interval mode
6373 *
6374 * ignore on error - we probably don't have permission to set it, but
6375 * it's not a big deal
6376 */
6377 set_my_sched_priority(-20);
6378
6379 restart:
6380 restarted++;
6381
6382 snapshot_proc_sysfs_files();
6383 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
6384 first_counter_read = 0;
6385 if (retval < -1) {
6386 exit(retval);
6387 } else if (retval == -1) {
6388 if (restarted > 10) {
6389 exit(retval);
6390 }
6391 re_initialize();
6392 goto restart;
6393 }
6394 restarted = 0;
6395 done_iters = 0;
6396 gettimeofday(&tv_even, (struct timezone *)NULL);
6397
6398 while (1) {
6399 if (for_all_proc_cpus(cpu_is_not_present)) {
6400 re_initialize();
6401 goto restart;
6402 }
6403 if (update_effective_str(false)) {
6404 re_initialize();
6405 goto restart;
6406 }
6407 do_sleep();
6408 if (snapshot_proc_sysfs_files())
6409 goto restart;
6410 retval = for_all_cpus(get_counters, ODD_COUNTERS);
6411 if (retval < -1) {
6412 exit(retval);
6413 } else if (retval == -1) {
6414 re_initialize();
6415 goto restart;
6416 }
6417 gettimeofday(&tv_odd, (struct timezone *)NULL);
6418 timersub(&tv_odd, &tv_even, &tv_delta);
6419 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
6420 re_initialize();
6421 goto restart;
6422 }
6423 delta_platform(&platform_counters_odd, &platform_counters_even);
6424 compute_average(EVEN_COUNTERS);
6425 format_all_counters(EVEN_COUNTERS);
6426 flush_output_stdout();
6427 if (exit_requested)
6428 break;
6429 if (num_iterations && ++done_iters >= num_iterations)
6430 break;
6431 do_sleep();
6432 if (snapshot_proc_sysfs_files())
6433 goto restart;
6434 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
6435 if (retval < -1) {
6436 exit(retval);
6437 } else if (retval == -1) {
6438 re_initialize();
6439 goto restart;
6440 }
6441 gettimeofday(&tv_even, (struct timezone *)NULL);
6442 timersub(&tv_even, &tv_odd, &tv_delta);
6443 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
6444 re_initialize();
6445 goto restart;
6446 }
6447 delta_platform(&platform_counters_even, &platform_counters_odd);
6448 compute_average(ODD_COUNTERS);
6449 format_all_counters(ODD_COUNTERS);
6450 flush_output_stdout();
6451 if (exit_requested)
6452 break;
6453 if (num_iterations && ++done_iters >= num_iterations)
6454 break;
6455 }
6456 }
6457
check_dev_msr()6458 void check_dev_msr()
6459 {
6460 struct stat sb;
6461 char pathname[32];
6462
6463 if (no_msr)
6464 return;
6465
6466 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
6467 if (stat(pathname, &sb))
6468 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
6469 no_msr = 1;
6470 }
6471
6472 /*
6473 * check for CAP_SYS_RAWIO
6474 * return 0 on success
6475 * return 1 on fail
6476 */
check_for_cap_sys_rawio(void)6477 int check_for_cap_sys_rawio(void)
6478 {
6479 cap_t caps;
6480 cap_flag_value_t cap_flag_value;
6481 int ret = 0;
6482
6483 caps = cap_get_proc();
6484 if (caps == NULL)
6485 return 1;
6486
6487 if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
6488 ret = 1;
6489 goto free_and_exit;
6490 }
6491
6492 if (cap_flag_value != CAP_SET) {
6493 ret = 1;
6494 goto free_and_exit;
6495 }
6496
6497 free_and_exit:
6498 if (cap_free(caps) == -1)
6499 err(-6, "cap_free\n");
6500
6501 return ret;
6502 }
6503
check_msr_permission(void)6504 void check_msr_permission(void)
6505 {
6506 int failed = 0;
6507 char pathname[32];
6508
6509 if (no_msr)
6510 return;
6511
6512 /* check for CAP_SYS_RAWIO */
6513 failed += check_for_cap_sys_rawio();
6514
6515 /* test file permissions */
6516 sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
6517 if (euidaccess(pathname, R_OK)) {
6518 failed++;
6519 }
6520
6521 if (failed) {
6522 warnx("Failed to access %s. Some of the counters may not be available\n"
6523 "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
6524 no_msr = 1;
6525 }
6526 }
6527
probe_bclk(void)6528 void probe_bclk(void)
6529 {
6530 unsigned long long msr;
6531 unsigned int base_ratio;
6532
6533 if (!platform->has_nhm_msrs || no_msr)
6534 return;
6535
6536 if (platform->bclk_freq == BCLK_100MHZ)
6537 bclk = 100.00;
6538 else if (platform->bclk_freq == BCLK_133MHZ)
6539 bclk = 133.33;
6540 else if (platform->bclk_freq == BCLK_SLV)
6541 bclk = slm_bclk();
6542 else
6543 return;
6544
6545 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
6546 base_ratio = (msr >> 8) & 0xFF;
6547
6548 base_hz = base_ratio * bclk * 1000000;
6549 has_base_hz = 1;
6550
6551 if (platform->enable_tsc_tweak)
6552 tsc_tweak = base_hz / tsc_hz;
6553 }
6554
remove_underbar(char * s)6555 static void remove_underbar(char *s)
6556 {
6557 char *to = s;
6558
6559 while (*s) {
6560 if (*s != '_')
6561 *to++ = *s;
6562 s++;
6563 }
6564
6565 *to = 0;
6566 }
6567
dump_turbo_ratio_info(void)6568 static void dump_turbo_ratio_info(void)
6569 {
6570 if (!has_turbo)
6571 return;
6572
6573 if (!platform->has_nhm_msrs || no_msr)
6574 return;
6575
6576 if (platform->trl_msrs & TRL_LIMIT2)
6577 dump_turbo_ratio_limit2();
6578
6579 if (platform->trl_msrs & TRL_LIMIT1)
6580 dump_turbo_ratio_limit1();
6581
6582 if (platform->trl_msrs & TRL_BASE) {
6583 dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT);
6584
6585 if (is_hybrid)
6586 dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT);
6587 }
6588
6589 if (platform->trl_msrs & TRL_ATOM)
6590 dump_atom_turbo_ratio_limits();
6591
6592 if (platform->trl_msrs & TRL_KNL)
6593 dump_knl_turbo_ratio_limits();
6594
6595 if (platform->has_config_tdp)
6596 dump_config_tdp();
6597 }
6598
read_sysfs_int(char * path)6599 static int read_sysfs_int(char *path)
6600 {
6601 FILE *input;
6602 int retval = -1;
6603
6604 input = fopen(path, "r");
6605 if (input == NULL) {
6606 if (debug)
6607 fprintf(outf, "NSFOD %s\n", path);
6608 return (-1);
6609 }
6610 if (fscanf(input, "%d", &retval) != 1)
6611 err(1, "%s: failed to read int from file", path);
6612 fclose(input);
6613
6614 return (retval);
6615 }
6616
dump_sysfs_file(char * path)6617 static void dump_sysfs_file(char *path)
6618 {
6619 FILE *input;
6620 char cpuidle_buf[64];
6621
6622 input = fopen(path, "r");
6623 if (input == NULL) {
6624 if (debug)
6625 fprintf(outf, "NSFOD %s\n", path);
6626 return;
6627 }
6628 if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
6629 err(1, "%s: failed to read file", path);
6630 fclose(input);
6631
6632 fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
6633 }
6634
probe_intel_uncore_frequency_legacy(void)6635 static void probe_intel_uncore_frequency_legacy(void)
6636 {
6637 int i, j;
6638 char path[256];
6639
6640 for (i = 0; i < topo.num_packages; ++i) {
6641 for (j = 0; j <= topo.max_die_id; ++j) {
6642 int k, l;
6643 char path_base[128];
6644
6645 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
6646 j);
6647
6648 if (access(path_base, R_OK))
6649 continue;
6650
6651 BIC_PRESENT(BIC_UNCORE_MHZ);
6652
6653 if (quiet)
6654 return;
6655
6656 sprintf(path, "%s/min_freq_khz", path_base);
6657 k = read_sysfs_int(path);
6658 sprintf(path, "%s/max_freq_khz", path_base);
6659 l = read_sysfs_int(path);
6660 fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
6661
6662 sprintf(path, "%s/initial_min_freq_khz", path_base);
6663 k = read_sysfs_int(path);
6664 sprintf(path, "%s/initial_max_freq_khz", path_base);
6665 l = read_sysfs_int(path);
6666 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
6667
6668 sprintf(path, "%s/current_freq_khz", path_base);
6669 k = read_sysfs_int(path);
6670 fprintf(outf, " %d MHz\n", k / 1000);
6671 }
6672 }
6673 }
6674
probe_intel_uncore_frequency_cluster(void)6675 static void probe_intel_uncore_frequency_cluster(void)
6676 {
6677 int i, uncore_max_id;
6678 char path[256];
6679 char path_base[128];
6680
6681 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
6682 return;
6683
6684 for (uncore_max_id = 0;; ++uncore_max_id) {
6685
6686 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id);
6687
6688 /* uncore## start at 00 and skips no numbers, so stop upon first missing */
6689 if (access(path_base, R_OK)) {
6690 uncore_max_id -= 1;
6691 break;
6692 }
6693 }
6694 for (i = uncore_max_id; i >= 0; --i) {
6695 int k, l;
6696 int package_id, domain_id, cluster_id;
6697 char name_buf[16];
6698
6699 sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
6700
6701 if (access(path_base, R_OK))
6702 err(1, "%s: %s\n", __func__, path_base);
6703
6704 sprintf(path, "%s/package_id", path_base);
6705 package_id = read_sysfs_int(path);
6706
6707 sprintf(path, "%s/domain_id", path_base);
6708 domain_id = read_sysfs_int(path);
6709
6710 sprintf(path, "%s/fabric_cluster_id", path_base);
6711 cluster_id = read_sysfs_int(path);
6712
6713 sprintf(path, "%s/current_freq_khz", path_base);
6714 sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
6715
6716 add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
6717
6718 if (quiet)
6719 continue;
6720
6721 sprintf(path, "%s/min_freq_khz", path_base);
6722 k = read_sysfs_int(path);
6723 sprintf(path, "%s/max_freq_khz", path_base);
6724 l = read_sysfs_int(path);
6725 fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
6726 cluster_id, k / 1000, l / 1000);
6727
6728 sprintf(path, "%s/initial_min_freq_khz", path_base);
6729 k = read_sysfs_int(path);
6730 sprintf(path, "%s/initial_max_freq_khz", path_base);
6731 l = read_sysfs_int(path);
6732 fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
6733
6734 sprintf(path, "%s/current_freq_khz", path_base);
6735 k = read_sysfs_int(path);
6736 fprintf(outf, " %d MHz\n", k / 1000);
6737 }
6738 }
6739
probe_intel_uncore_frequency(void)6740 static void probe_intel_uncore_frequency(void)
6741 {
6742 if (!genuine_intel)
6743 return;
6744
6745 if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK) == 0)
6746 probe_intel_uncore_frequency_cluster();
6747 else
6748 probe_intel_uncore_frequency_legacy();
6749 }
6750
set_graphics_fp(char * path,int idx)6751 static void set_graphics_fp(char *path, int idx)
6752 {
6753 if (!access(path, R_OK))
6754 gfx_info[idx].fp = fopen_or_die(path, "r");
6755 }
6756
6757 /* Enlarge this if there are /sys/class/drm/card2 ... */
6758 #define GFX_MAX_CARDS 2
6759
probe_graphics(void)6760 static void probe_graphics(void)
6761 {
6762 char path[PATH_MAX];
6763 int i;
6764
6765 /* Xe graphics sysfs knobs */
6766 if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
6767 FILE *fp;
6768 char buf[8];
6769 bool gt0_is_gt;
6770
6771 fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
6772 if (!fp)
6773 goto next;
6774
6775 if (!fread(buf, sizeof(char), 7, fp)) {
6776 fclose(fp);
6777 goto next;
6778 }
6779 fclose(fp);
6780
6781 if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
6782 gt0_is_gt = true;
6783 else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
6784 gt0_is_gt = false;
6785 else
6786 goto next;
6787
6788 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6);
6789
6790 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz);
6791
6792 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz);
6793
6794 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6);
6795
6796 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz);
6797
6798 set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz);
6799
6800 goto end;
6801 }
6802
6803 next:
6804 /* New i915 graphics sysfs knobs */
6805 for (i = 0; i < GFX_MAX_CARDS; i++) {
6806 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i);
6807 if (!access(path, R_OK))
6808 break;
6809 }
6810
6811 if (i == GFX_MAX_CARDS)
6812 goto legacy_i915;
6813
6814 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i);
6815 set_graphics_fp(path, GFX_rc6);
6816
6817 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i);
6818 set_graphics_fp(path, GFX_MHz);
6819
6820 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i);
6821 set_graphics_fp(path, GFX_ACTMHz);
6822
6823 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i);
6824 set_graphics_fp(path, SAM_mc6);
6825
6826 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i);
6827 set_graphics_fp(path, SAM_MHz);
6828
6829 snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i);
6830 set_graphics_fp(path, SAM_ACTMHz);
6831
6832 goto end;
6833
6834 legacy_i915:
6835 /* Fall back to traditional i915 graphics sysfs knobs */
6836 set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6);
6837
6838 set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz);
6839 if (!gfx_info[GFX_MHz].fp)
6840 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz);
6841
6842 set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz);
6843 if (!gfx_info[GFX_ACTMHz].fp)
6844 set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz);
6845
6846 end:
6847 if (gfx_info[GFX_rc6].fp)
6848 BIC_PRESENT(BIC_GFX_rc6);
6849 if (gfx_info[GFX_MHz].fp)
6850 BIC_PRESENT(BIC_GFXMHz);
6851 if (gfx_info[GFX_ACTMHz].fp)
6852 BIC_PRESENT(BIC_GFXACTMHz);
6853 if (gfx_info[SAM_mc6].fp)
6854 BIC_PRESENT(BIC_SAM_mc6);
6855 if (gfx_info[SAM_MHz].fp)
6856 BIC_PRESENT(BIC_SAMMHz);
6857 if (gfx_info[SAM_ACTMHz].fp)
6858 BIC_PRESENT(BIC_SAMACTMHz);
6859 }
6860
dump_sysfs_cstate_config(void)6861 static void dump_sysfs_cstate_config(void)
6862 {
6863 char path[64];
6864 char name_buf[16];
6865 char desc[64];
6866 FILE *input;
6867 int state;
6868 char *sp;
6869
6870 if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
6871 fprintf(outf, "cpuidle not loaded\n");
6872 return;
6873 }
6874
6875 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
6876 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
6877 dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
6878
6879 for (state = 0; state < 10; ++state) {
6880
6881 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
6882 input = fopen(path, "r");
6883 if (input == NULL)
6884 continue;
6885 if (!fgets(name_buf, sizeof(name_buf), input))
6886 err(1, "%s: failed to read file", path);
6887
6888 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
6889 sp = strchr(name_buf, '-');
6890 if (!sp)
6891 sp = strchrnul(name_buf, '\n');
6892 *sp = '\0';
6893 fclose(input);
6894
6895 remove_underbar(name_buf);
6896
6897 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state);
6898 input = fopen(path, "r");
6899 if (input == NULL)
6900 continue;
6901 if (!fgets(desc, sizeof(desc), input))
6902 err(1, "%s: failed to read file", path);
6903
6904 fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
6905 fclose(input);
6906 }
6907 }
6908
dump_sysfs_pstate_config(void)6909 static void dump_sysfs_pstate_config(void)
6910 {
6911 char path[64];
6912 char driver_buf[64];
6913 char governor_buf[64];
6914 FILE *input;
6915 int turbo;
6916
6917 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu);
6918 input = fopen(path, "r");
6919 if (input == NULL) {
6920 fprintf(outf, "NSFOD %s\n", path);
6921 return;
6922 }
6923 if (!fgets(driver_buf, sizeof(driver_buf), input))
6924 err(1, "%s: failed to read file", path);
6925 fclose(input);
6926
6927 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu);
6928 input = fopen(path, "r");
6929 if (input == NULL) {
6930 fprintf(outf, "NSFOD %s\n", path);
6931 return;
6932 }
6933 if (!fgets(governor_buf, sizeof(governor_buf), input))
6934 err(1, "%s: failed to read file", path);
6935 fclose(input);
6936
6937 fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
6938 fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
6939
6940 sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
6941 input = fopen(path, "r");
6942 if (input != NULL) {
6943 if (fscanf(input, "%d", &turbo) != 1)
6944 err(1, "%s: failed to parse number from file", path);
6945 fprintf(outf, "cpufreq boost: %d\n", turbo);
6946 fclose(input);
6947 }
6948
6949 sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
6950 input = fopen(path, "r");
6951 if (input != NULL) {
6952 if (fscanf(input, "%d", &turbo) != 1)
6953 err(1, "%s: failed to parse number from file", path);
6954 fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
6955 fclose(input);
6956 }
6957 }
6958
6959 /*
6960 * print_epb()
6961 * Decode the ENERGY_PERF_BIAS MSR
6962 */
print_epb(struct thread_data * t,struct core_data * c,struct pkg_data * p)6963 int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
6964 {
6965 char *epb_string;
6966 int cpu, epb;
6967
6968 UNUSED(c);
6969 UNUSED(p);
6970
6971 if (!has_epb)
6972 return 0;
6973
6974 cpu = t->cpu_id;
6975
6976 /* EPB is per-package */
6977 if (!is_cpu_first_thread_in_package(t, c, p))
6978 return 0;
6979
6980 if (cpu_migrate(cpu)) {
6981 fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
6982 return -1;
6983 }
6984
6985 epb = get_epb(cpu);
6986 if (epb < 0)
6987 return 0;
6988
6989 switch (epb) {
6990 case ENERGY_PERF_BIAS_PERFORMANCE:
6991 epb_string = "performance";
6992 break;
6993 case ENERGY_PERF_BIAS_NORMAL:
6994 epb_string = "balanced";
6995 break;
6996 case ENERGY_PERF_BIAS_POWERSAVE:
6997 epb_string = "powersave";
6998 break;
6999 default:
7000 epb_string = "custom";
7001 break;
7002 }
7003 fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string);
7004
7005 return 0;
7006 }
7007
7008 /*
7009 * print_hwp()
7010 * Decode the MSR_HWP_CAPABILITIES
7011 */
print_hwp(struct thread_data * t,struct core_data * c,struct pkg_data * p)7012 int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7013 {
7014 unsigned long long msr;
7015 int cpu;
7016
7017 UNUSED(c);
7018 UNUSED(p);
7019
7020 if (no_msr)
7021 return 0;
7022
7023 if (!has_hwp)
7024 return 0;
7025
7026 cpu = t->cpu_id;
7027
7028 /* MSR_HWP_CAPABILITIES is per-package */
7029 if (!is_cpu_first_thread_in_package(t, c, p))
7030 return 0;
7031
7032 if (cpu_migrate(cpu)) {
7033 fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
7034 return -1;
7035 }
7036
7037 if (get_msr(cpu, MSR_PM_ENABLE, &msr))
7038 return 0;
7039
7040 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
7041
7042 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
7043 if ((msr & (1 << 0)) == 0)
7044 return 0;
7045
7046 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
7047 return 0;
7048
7049 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
7050 "(high %d guar %d eff %d low %d)\n",
7051 cpu, msr,
7052 (unsigned int)HWP_HIGHEST_PERF(msr),
7053 (unsigned int)HWP_GUARANTEED_PERF(msr),
7054 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
7055
7056 if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
7057 return 0;
7058
7059 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
7060 "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
7061 cpu, msr,
7062 (unsigned int)(((msr) >> 0) & 0xff),
7063 (unsigned int)(((msr) >> 8) & 0xff),
7064 (unsigned int)(((msr) >> 16) & 0xff),
7065 (unsigned int)(((msr) >> 24) & 0xff),
7066 (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
7067
7068 if (has_hwp_pkg) {
7069 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
7070 return 0;
7071
7072 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
7073 "(min %d max %d des %d epp 0x%x window 0x%x)\n",
7074 cpu, msr,
7075 (unsigned int)(((msr) >> 0) & 0xff),
7076 (unsigned int)(((msr) >> 8) & 0xff),
7077 (unsigned int)(((msr) >> 16) & 0xff),
7078 (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
7079 }
7080 if (has_hwp_notify) {
7081 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
7082 return 0;
7083
7084 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
7085 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
7086 cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
7087 }
7088 if (get_msr(cpu, MSR_HWP_STATUS, &msr))
7089 return 0;
7090
7091 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
7092 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
7093 cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
7094
7095 return 0;
7096 }
7097
7098 /*
7099 * print_perf_limit()
7100 */
print_perf_limit(struct thread_data * t,struct core_data * c,struct pkg_data * p)7101 int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7102 {
7103 unsigned long long msr;
7104 int cpu;
7105
7106 UNUSED(c);
7107 UNUSED(p);
7108
7109 if (no_msr)
7110 return 0;
7111
7112 cpu = t->cpu_id;
7113
7114 /* per-package */
7115 if (!is_cpu_first_thread_in_package(t, c, p))
7116 return 0;
7117
7118 if (cpu_migrate(cpu)) {
7119 fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
7120 return -1;
7121 }
7122
7123 if (platform->plr_msrs & PLR_CORE) {
7124 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
7125 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
7126 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
7127 (msr & 1 << 15) ? "bit15, " : "",
7128 (msr & 1 << 14) ? "bit14, " : "",
7129 (msr & 1 << 13) ? "Transitions, " : "",
7130 (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
7131 (msr & 1 << 11) ? "PkgPwrL2, " : "",
7132 (msr & 1 << 10) ? "PkgPwrL1, " : "",
7133 (msr & 1 << 9) ? "CorePwr, " : "",
7134 (msr & 1 << 8) ? "Amps, " : "",
7135 (msr & 1 << 6) ? "VR-Therm, " : "",
7136 (msr & 1 << 5) ? "Auto-HWP, " : "",
7137 (msr & 1 << 4) ? "Graphics, " : "",
7138 (msr & 1 << 2) ? "bit2, " : "",
7139 (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
7140 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
7141 (msr & 1 << 31) ? "bit31, " : "",
7142 (msr & 1 << 30) ? "bit30, " : "",
7143 (msr & 1 << 29) ? "Transitions, " : "",
7144 (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
7145 (msr & 1 << 27) ? "PkgPwrL2, " : "",
7146 (msr & 1 << 26) ? "PkgPwrL1, " : "",
7147 (msr & 1 << 25) ? "CorePwr, " : "",
7148 (msr & 1 << 24) ? "Amps, " : "",
7149 (msr & 1 << 22) ? "VR-Therm, " : "",
7150 (msr & 1 << 21) ? "Auto-HWP, " : "",
7151 (msr & 1 << 20) ? "Graphics, " : "",
7152 (msr & 1 << 18) ? "bit18, " : "",
7153 (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
7154
7155 }
7156 if (platform->plr_msrs & PLR_GFX) {
7157 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
7158 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
7159 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
7160 (msr & 1 << 0) ? "PROCHOT, " : "",
7161 (msr & 1 << 1) ? "ThermStatus, " : "",
7162 (msr & 1 << 4) ? "Graphics, " : "",
7163 (msr & 1 << 6) ? "VR-Therm, " : "",
7164 (msr & 1 << 8) ? "Amps, " : "",
7165 (msr & 1 << 9) ? "GFXPwr, " : "",
7166 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
7167 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
7168 (msr & 1 << 16) ? "PROCHOT, " : "",
7169 (msr & 1 << 17) ? "ThermStatus, " : "",
7170 (msr & 1 << 20) ? "Graphics, " : "",
7171 (msr & 1 << 22) ? "VR-Therm, " : "",
7172 (msr & 1 << 24) ? "Amps, " : "",
7173 (msr & 1 << 25) ? "GFXPwr, " : "",
7174 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
7175 }
7176 if (platform->plr_msrs & PLR_RING) {
7177 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
7178 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
7179 fprintf(outf, " (Active: %s%s%s%s%s%s)",
7180 (msr & 1 << 0) ? "PROCHOT, " : "",
7181 (msr & 1 << 1) ? "ThermStatus, " : "",
7182 (msr & 1 << 6) ? "VR-Therm, " : "",
7183 (msr & 1 << 8) ? "Amps, " : "",
7184 (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
7185 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
7186 (msr & 1 << 16) ? "PROCHOT, " : "",
7187 (msr & 1 << 17) ? "ThermStatus, " : "",
7188 (msr & 1 << 22) ? "VR-Therm, " : "",
7189 (msr & 1 << 24) ? "Amps, " : "",
7190 (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
7191 }
7192 return 0;
7193 }
7194
7195 #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
7196 #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
7197
get_quirk_tdp(void)7198 double get_quirk_tdp(void)
7199 {
7200 if (platform->rapl_quirk_tdp)
7201 return platform->rapl_quirk_tdp;
7202
7203 return 135.0;
7204 }
7205
get_tdp_intel(void)7206 double get_tdp_intel(void)
7207 {
7208 unsigned long long msr;
7209
7210 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO)
7211 if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
7212 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
7213 return get_quirk_tdp();
7214 }
7215
get_tdp_amd(void)7216 double get_tdp_amd(void)
7217 {
7218 return get_quirk_tdp();
7219 }
7220
rapl_probe_intel(void)7221 void rapl_probe_intel(void)
7222 {
7223 unsigned long long msr;
7224 unsigned int time_unit;
7225 double tdp;
7226 const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
7227 const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
7228
7229 if (rapl_joules)
7230 bic_enabled &= ~bic_watt_bits;
7231 else
7232 bic_enabled &= ~bic_joules_bits;
7233
7234 if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
7235 bic_enabled &= ~BIC_PKG__;
7236 if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
7237 bic_enabled &= ~BIC_RAM__;
7238
7239 /* units on package 0, verify later other packages match */
7240 if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
7241 return;
7242
7243 rapl_power_units = 1.0 / (1 << (msr & 0xF));
7244 if (platform->has_rapl_divisor)
7245 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
7246 else
7247 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
7248
7249 if (platform->has_fixed_rapl_unit)
7250 rapl_dram_energy_units = (15.3 / 1000000);
7251 else
7252 rapl_dram_energy_units = rapl_energy_units;
7253
7254 if (platform->has_fixed_rapl_psys_unit)
7255 rapl_psys_energy_units = 1.0;
7256 else
7257 rapl_psys_energy_units = rapl_energy_units;
7258
7259 time_unit = msr >> 16 & 0xF;
7260 if (time_unit == 0)
7261 time_unit = 0xA;
7262
7263 rapl_time_units = 1.0 / (1 << (time_unit));
7264
7265 tdp = get_tdp_intel();
7266
7267 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
7268 if (!quiet)
7269 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
7270 }
7271
rapl_probe_amd(void)7272 void rapl_probe_amd(void)
7273 {
7274 unsigned long long msr;
7275 double tdp;
7276 const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
7277 const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
7278
7279 if (rapl_joules)
7280 bic_enabled &= ~bic_watt_bits;
7281 else
7282 bic_enabled &= ~bic_joules_bits;
7283
7284 if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
7285 return;
7286
7287 rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf));
7288 rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
7289 rapl_power_units = ldexp(1.0, -(msr & 0xf));
7290
7291 tdp = get_tdp_amd();
7292
7293 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
7294 if (!quiet)
7295 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
7296 }
7297
print_power_limit_msr(int cpu,unsigned long long msr,char * label)7298 void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
7299 {
7300 fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n",
7301 cpu, label,
7302 ((msr >> 15) & 1) ? "EN" : "DIS",
7303 ((msr >> 0) & 0x7FFF) * rapl_power_units,
7304 (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
7305 (((msr >> 16) & 1) ? "EN" : "DIS"));
7306
7307 return;
7308 }
7309
print_rapl(struct thread_data * t,struct core_data * c,struct pkg_data * p)7310 int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7311 {
7312 unsigned long long msr;
7313 const char *msr_name;
7314 int cpu;
7315
7316 UNUSED(c);
7317 UNUSED(p);
7318
7319 if (!platform->rapl_msrs)
7320 return 0;
7321
7322 /* RAPL counters are per package, so print only for 1st thread/package */
7323 if (!is_cpu_first_thread_in_package(t, c, p))
7324 return 0;
7325
7326 cpu = t->cpu_id;
7327 if (cpu_migrate(cpu)) {
7328 fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
7329 return -1;
7330 }
7331
7332 if (platform->rapl_msrs & RAPL_AMD_F17H) {
7333 msr_name = "MSR_RAPL_PWR_UNIT";
7334 if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr))
7335 return -1;
7336 } else {
7337 msr_name = "MSR_RAPL_POWER_UNIT";
7338 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
7339 return -1;
7340 }
7341
7342 fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr,
7343 rapl_power_units, rapl_energy_units, rapl_time_units);
7344
7345 if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) {
7346
7347 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
7348 return -5;
7349
7350 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
7351 cpu, msr,
7352 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7353 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7354 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7355 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
7356
7357 }
7358 if (platform->rapl_msrs & RAPL_PKG) {
7359
7360 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
7361 return -9;
7362
7363 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
7364 cpu, msr, (msr >> 63) & 1 ? "" : "UN");
7365
7366 print_power_limit_msr(cpu, msr, "PKG Limit #1");
7367 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n",
7368 cpu,
7369 ((msr >> 47) & 1) ? "EN" : "DIS",
7370 ((msr >> 32) & 0x7FFF) * rapl_power_units,
7371 (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
7372 ((msr >> 48) & 1) ? "EN" : "DIS");
7373
7374 if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr))
7375 return -9;
7376
7377 fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr);
7378 fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n",
7379 cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN");
7380 }
7381
7382 if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) {
7383 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
7384 return -6;
7385
7386 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
7387 cpu, msr,
7388 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7389 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7390 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
7391 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
7392 }
7393 if (platform->rapl_msrs & RAPL_DRAM) {
7394 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
7395 return -9;
7396 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
7397 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
7398
7399 print_power_limit_msr(cpu, msr, "DRAM Limit");
7400 }
7401 if (platform->rapl_msrs & RAPL_CORE_POLICY) {
7402 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
7403 return -7;
7404
7405 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
7406 }
7407 if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) {
7408 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
7409 return -9;
7410 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
7411 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
7412 print_power_limit_msr(cpu, msr, "Cores Limit");
7413 }
7414 if (platform->rapl_msrs & RAPL_GFX) {
7415 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
7416 return -8;
7417
7418 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
7419
7420 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
7421 return -9;
7422 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
7423 cpu, msr, (msr >> 31) & 1 ? "" : "UN");
7424 print_power_limit_msr(cpu, msr, "GFX Limit");
7425 }
7426 return 0;
7427 }
7428
7429 /*
7430 * probe_rapl()
7431 *
7432 * sets rapl_power_units, rapl_energy_units, rapl_time_units
7433 */
probe_rapl(void)7434 void probe_rapl(void)
7435 {
7436 if (!platform->rapl_msrs || no_msr)
7437 return;
7438
7439 if (genuine_intel)
7440 rapl_probe_intel();
7441 if (authentic_amd || hygon_genuine)
7442 rapl_probe_amd();
7443
7444 if (quiet)
7445 return;
7446
7447 for_all_cpus(print_rapl, ODD_COUNTERS);
7448 }
7449
7450 /*
7451 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
7452 * the Thermal Control Circuit (TCC) activates.
7453 * This is usually equal to tjMax.
7454 *
7455 * Older processors do not have this MSR, so there we guess,
7456 * but also allow cmdline over-ride with -T.
7457 *
7458 * Several MSR temperature values are in units of degrees-C
7459 * below this value, including the Digital Thermal Sensor (DTS),
7460 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
7461 */
set_temperature_target(struct thread_data * t,struct core_data * c,struct pkg_data * p)7462 int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7463 {
7464 unsigned long long msr;
7465 unsigned int tcc_default, tcc_offset;
7466 int cpu;
7467
7468 UNUSED(c);
7469 UNUSED(p);
7470
7471 /* tj_max is used only for dts or ptm */
7472 if (!(do_dts || do_ptm))
7473 return 0;
7474
7475 /* this is a per-package concept */
7476 if (!is_cpu_first_thread_in_package(t, c, p))
7477 return 0;
7478
7479 cpu = t->cpu_id;
7480 if (cpu_migrate(cpu)) {
7481 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
7482 return -1;
7483 }
7484
7485 if (tj_max_override != 0) {
7486 tj_max = tj_max_override;
7487 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max);
7488 return 0;
7489 }
7490
7491 /* Temperature Target MSR is Nehalem and newer only */
7492 if (!platform->has_nhm_msrs || no_msr)
7493 goto guess;
7494
7495 if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
7496 goto guess;
7497
7498 tcc_default = (msr >> 16) & 0xFF;
7499
7500 if (!quiet) {
7501 int bits = platform->tcc_offset_bits;
7502 unsigned long long enabled = 0;
7503
7504 if (bits && !get_msr(base_cpu, MSR_PLATFORM_INFO, &enabled))
7505 enabled = (enabled >> 30) & 1;
7506
7507 if (bits && enabled) {
7508 tcc_offset = (msr >> 24) & GENMASK(bits - 1, 0);
7509 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
7510 cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
7511 } else {
7512 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
7513 }
7514 }
7515
7516 if (!tcc_default)
7517 goto guess;
7518
7519 tj_max = tcc_default;
7520
7521 return 0;
7522
7523 guess:
7524 tj_max = TJMAX_DEFAULT;
7525 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
7526
7527 return 0;
7528 }
7529
print_thermal(struct thread_data * t,struct core_data * c,struct pkg_data * p)7530 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7531 {
7532 unsigned long long msr;
7533 unsigned int dts, dts2;
7534 int cpu;
7535
7536 UNUSED(c);
7537 UNUSED(p);
7538
7539 if (no_msr)
7540 return 0;
7541
7542 if (!(do_dts || do_ptm))
7543 return 0;
7544
7545 cpu = t->cpu_id;
7546
7547 /* DTS is per-core, no need to print for each thread */
7548 if (!is_cpu_first_thread_in_core(t, c, p))
7549 return 0;
7550
7551 if (cpu_migrate(cpu)) {
7552 fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
7553 return -1;
7554 }
7555
7556 if (do_ptm && is_cpu_first_core_in_package(t, c, p)) {
7557 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
7558 return 0;
7559
7560 dts = (msr >> 16) & 0x7F;
7561 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
7562
7563 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
7564 return 0;
7565
7566 dts = (msr >> 16) & 0x7F;
7567 dts2 = (msr >> 8) & 0x7F;
7568 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
7569 cpu, msr, tj_max - dts, tj_max - dts2);
7570 }
7571
7572 if (do_dts && debug) {
7573 unsigned int resolution;
7574
7575 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
7576 return 0;
7577
7578 dts = (msr >> 16) & 0x7F;
7579 resolution = (msr >> 27) & 0xF;
7580 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
7581 cpu, msr, tj_max - dts, resolution);
7582
7583 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
7584 return 0;
7585
7586 dts = (msr >> 16) & 0x7F;
7587 dts2 = (msr >> 8) & 0x7F;
7588 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
7589 cpu, msr, tj_max - dts, tj_max - dts2);
7590 }
7591
7592 return 0;
7593 }
7594
probe_thermal(void)7595 void probe_thermal(void)
7596 {
7597 if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK))
7598 BIC_PRESENT(BIC_CORE_THROT_CNT);
7599 else
7600 BIC_NOT_PRESENT(BIC_CORE_THROT_CNT);
7601
7602 for_all_cpus(set_temperature_target, ODD_COUNTERS);
7603
7604 if (quiet)
7605 return;
7606
7607 for_all_cpus(print_thermal, ODD_COUNTERS);
7608 }
7609
get_cpu_type(struct thread_data * t,struct core_data * c,struct pkg_data * p)7610 int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
7611 {
7612 unsigned int eax, ebx, ecx, edx;
7613
7614 UNUSED(c);
7615 UNUSED(p);
7616
7617 if (!genuine_intel)
7618 return 0;
7619
7620 if (cpu_migrate(t->cpu_id)) {
7621 fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
7622 return -1;
7623 }
7624
7625 if (max_level < 0x1a)
7626 return 0;
7627
7628 __cpuid(0x1a, eax, ebx, ecx, edx);
7629 eax = (eax >> 24) & 0xFF;
7630 if (eax == 0x20)
7631 t->is_atom = true;
7632 return 0;
7633 }
7634
decode_feature_control_msr(void)7635 void decode_feature_control_msr(void)
7636 {
7637 unsigned long long msr;
7638
7639 if (no_msr)
7640 return;
7641
7642 if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
7643 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
7644 base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
7645 }
7646
decode_misc_enable_msr(void)7647 void decode_misc_enable_msr(void)
7648 {
7649 unsigned long long msr;
7650
7651 if (no_msr)
7652 return;
7653
7654 if (!genuine_intel)
7655 return;
7656
7657 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
7658 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
7659 base_cpu, msr,
7660 msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
7661 msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
7662 msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
7663 msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
7664 msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
7665 }
7666
decode_misc_feature_control(void)7667 void decode_misc_feature_control(void)
7668 {
7669 unsigned long long msr;
7670
7671 if (no_msr)
7672 return;
7673
7674 if (!platform->has_msr_misc_feature_control)
7675 return;
7676
7677 if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
7678 fprintf(outf,
7679 "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
7680 base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "",
7681 msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : "");
7682 }
7683
7684 /*
7685 * Decode MSR_MISC_PWR_MGMT
7686 *
7687 * Decode the bits according to the Nehalem documentation
7688 * bit[0] seems to continue to have same meaning going forward
7689 * bit[1] less so...
7690 */
decode_misc_pwr_mgmt_msr(void)7691 void decode_misc_pwr_mgmt_msr(void)
7692 {
7693 unsigned long long msr;
7694
7695 if (no_msr)
7696 return;
7697
7698 if (!platform->has_msr_misc_pwr_mgmt)
7699 return;
7700
7701 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
7702 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
7703 base_cpu, msr,
7704 msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
7705 }
7706
7707 /*
7708 * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
7709 *
7710 * This MSRs are present on Silvermont processors,
7711 * Intel Atom processor E3000 series (Baytrail), and friends.
7712 */
decode_c6_demotion_policy_msr(void)7713 void decode_c6_demotion_policy_msr(void)
7714 {
7715 unsigned long long msr;
7716
7717 if (no_msr)
7718 return;
7719
7720 if (!platform->has_msr_c6_demotion_policy_config)
7721 return;
7722
7723 if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
7724 fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
7725 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
7726
7727 if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
7728 fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
7729 base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
7730 }
7731
print_dev_latency(void)7732 void print_dev_latency(void)
7733 {
7734 char *path = "/dev/cpu_dma_latency";
7735 int fd;
7736 int value;
7737 int retval;
7738
7739 fd = open(path, O_RDONLY);
7740 if (fd < 0) {
7741 if (debug)
7742 warnx("Read %s failed", path);
7743 return;
7744 }
7745
7746 retval = read(fd, (void *)&value, sizeof(int));
7747 if (retval != sizeof(int)) {
7748 warn("read failed %s", path);
7749 close(fd);
7750 return;
7751 }
7752 fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained");
7753
7754 close(fd);
7755 }
7756
has_instr_count_access(void)7757 static int has_instr_count_access(void)
7758 {
7759 int fd;
7760 int has_access;
7761
7762 if (no_perf)
7763 return 0;
7764
7765 fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
7766 has_access = fd != -1;
7767
7768 if (fd != -1)
7769 close(fd);
7770
7771 if (!has_access)
7772 warnx("Failed to access %s. Some of the counters may not be available\n"
7773 "\tRun as root to enable them or use %s to disable the access explicitly",
7774 "instructions retired perf counter", "--no-perf");
7775
7776 return has_access;
7777 }
7778
add_rapl_perf_counter_(int cpu,struct rapl_counter_info_t * rci,const struct rapl_counter_arch_info * cai,double * scale_,enum rapl_unit * unit_)7779 int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
7780 double *scale_, enum rapl_unit *unit_)
7781 {
7782 if (no_perf)
7783 return -1;
7784
7785 const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name);
7786
7787 if (scale == 0.0)
7788 return -1;
7789
7790 const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
7791
7792 if (unit == RAPL_UNIT_INVALID)
7793 return -1;
7794
7795 const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
7796 const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name);
7797
7798 const int fd_counter =
7799 open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
7800 if (fd_counter == -1)
7801 return -1;
7802
7803 /* If it's the first counter opened, make it a group descriptor */
7804 if (rci->fd_perf == -1)
7805 rci->fd_perf = fd_counter;
7806
7807 *scale_ = scale;
7808 *unit_ = unit;
7809 return fd_counter;
7810 }
7811
add_rapl_perf_counter(int cpu,struct rapl_counter_info_t * rci,const struct rapl_counter_arch_info * cai,double * scale,enum rapl_unit * unit)7812 int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
7813 double *scale, enum rapl_unit *unit)
7814 {
7815 int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
7816
7817 if (debug >= 2)
7818 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
7819
7820 return ret;
7821 }
7822
7823 /*
7824 * Linux-perf manages the HW instructions-retired counter
7825 * by enabling when requested, and hiding rollover
7826 */
linux_perf_init(void)7827 void linux_perf_init(void)
7828 {
7829 if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
7830 return;
7831
7832 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
7833 fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
7834 if (fd_instr_count_percpu == NULL)
7835 err(-1, "calloc fd_instr_count_percpu");
7836 }
7837 }
7838
rapl_perf_init(void)7839 void rapl_perf_init(void)
7840 {
7841 const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1;
7842 bool *domain_visited = calloc(num_domains, sizeof(bool));
7843
7844 rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
7845 if (rapl_counter_info_perdomain == NULL)
7846 err(-1, "calloc rapl_counter_info_percpu");
7847 rapl_counter_info_perdomain_size = num_domains;
7848
7849 /*
7850 * Initialize rapl_counter_info_percpu
7851 */
7852 for (unsigned int domain_id = 0; domain_id < num_domains; ++domain_id) {
7853 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
7854
7855 rci->fd_perf = -1;
7856 for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
7857 rci->data[i] = 0;
7858 rci->source[i] = COUNTER_SOURCE_NONE;
7859 }
7860 }
7861
7862 /*
7863 * Open/probe the counters
7864 * If can't get it via perf, fallback to MSR
7865 */
7866 for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
7867
7868 const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
7869 bool has_counter = 0;
7870 double scale;
7871 enum rapl_unit unit;
7872 unsigned int next_domain;
7873
7874 memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
7875
7876 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
7877
7878 if (cpu_is_not_allowed(cpu))
7879 continue;
7880
7881 /* Skip already seen and handled RAPL domains */
7882 next_domain =
7883 platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
7884
7885 assert(next_domain < num_domains);
7886
7887 if (domain_visited[next_domain])
7888 continue;
7889
7890 domain_visited[next_domain] = 1;
7891
7892 if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != base_cpu))
7893 continue;
7894
7895 struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
7896
7897 /* Check if the counter is enabled and accessible */
7898 if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
7899
7900 /* Use perf API for this counter */
7901 if (!no_perf && cai->perf_name
7902 && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
7903 rci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
7904 rci->scale[cai->rci_index] = scale * cai->compat_scale;
7905 rci->unit[cai->rci_index] = unit;
7906 rci->flags[cai->rci_index] = cai->flags;
7907
7908 /* Use MSR for this counter */
7909 } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) {
7910 rci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
7911 rci->msr[cai->rci_index] = cai->msr;
7912 rci->msr_mask[cai->rci_index] = cai->msr_mask;
7913 rci->msr_shift[cai->rci_index] = cai->msr_shift;
7914 rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
7915 rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
7916 rci->flags[cai->rci_index] = cai->flags;
7917 }
7918 }
7919
7920 if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE)
7921 has_counter = 1;
7922 }
7923
7924 /* If any CPU has access to the counter, make it present */
7925 if (has_counter)
7926 BIC_PRESENT(cai->bic);
7927 }
7928
7929 free(domain_visited);
7930 }
7931
7932 /* Assumes msr_counter_info is populated */
has_amperf_access(void)7933 static int has_amperf_access(void)
7934 {
7935 return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present &&
7936 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present;
7937 }
7938
get_cstate_perf_group_fd(struct cstate_counter_info_t * cci,const char * group_name)7939 int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name)
7940 {
7941 if (strcmp(group_name, "cstate_core") == 0)
7942 return &cci->fd_perf_core;
7943
7944 if (strcmp(group_name, "cstate_pkg") == 0)
7945 return &cci->fd_perf_pkg;
7946
7947 return NULL;
7948 }
7949
add_cstate_perf_counter_(int cpu,struct cstate_counter_info_t * cci,const struct cstate_counter_arch_info * cai)7950 int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
7951 {
7952 if (no_perf)
7953 return -1;
7954
7955 int *pfd_group = get_cstate_perf_group_fd(cci, cai->perf_subsys);
7956
7957 if (pfd_group == NULL)
7958 return -1;
7959
7960 const unsigned int type = read_perf_type(cai->perf_subsys);
7961 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
7962
7963 const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP);
7964
7965 if (fd_counter == -1)
7966 return -1;
7967
7968 /* If it's the first counter opened, make it a group descriptor */
7969 if (*pfd_group == -1)
7970 *pfd_group = fd_counter;
7971
7972 return fd_counter;
7973 }
7974
add_cstate_perf_counter(int cpu,struct cstate_counter_info_t * cci,const struct cstate_counter_arch_info * cai)7975 int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const struct cstate_counter_arch_info *cai)
7976 {
7977 int ret = add_cstate_perf_counter_(cpu, cci, cai);
7978
7979 if (debug >= 2)
7980 fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
7981
7982 return ret;
7983 }
7984
add_msr_perf_counter_(int cpu,struct msr_counter_info_t * cci,const struct msr_counter_arch_info * cai)7985 int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
7986 {
7987 if (no_perf)
7988 return -1;
7989
7990 const unsigned int type = read_perf_type(cai->perf_subsys);
7991 const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name);
7992
7993 const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP);
7994
7995 if (fd_counter == -1)
7996 return -1;
7997
7998 /* If it's the first counter opened, make it a group descriptor */
7999 if (cci->fd_perf == -1)
8000 cci->fd_perf = fd_counter;
8001
8002 return fd_counter;
8003 }
8004
add_msr_perf_counter(int cpu,struct msr_counter_info_t * cci,const struct msr_counter_arch_info * cai)8005 int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai)
8006 {
8007 int ret = add_msr_perf_counter_(cpu, cci, cai);
8008
8009 if (debug)
8010 fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu);
8011
8012 return ret;
8013 }
8014
msr_perf_init_(void)8015 void msr_perf_init_(void)
8016 {
8017 const int mci_num = topo.max_cpu_num + 1;
8018
8019 msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info));
8020 if (!msr_counter_info)
8021 err(1, "calloc msr_counter_info");
8022 msr_counter_info_size = mci_num;
8023
8024 for (int cpu = 0; cpu < mci_num; ++cpu)
8025 msr_counter_info[cpu].fd_perf = -1;
8026
8027 for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) {
8028
8029 struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx];
8030
8031 cai->present = false;
8032
8033 for (int cpu = 0; cpu < mci_num; ++cpu) {
8034
8035 struct msr_counter_info_t *const cci = &msr_counter_info[cpu];
8036
8037 if (cpu_is_not_allowed(cpu))
8038 continue;
8039
8040 if (cai->needed) {
8041 /* Use perf API for this counter */
8042 if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) {
8043 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
8044 cai->present = true;
8045
8046 /* User MSR for this counter */
8047 } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) {
8048 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
8049 cci->msr[cai->rci_index] = cai->msr;
8050 cci->msr_mask[cai->rci_index] = cai->msr_mask;
8051 cai->present = true;
8052 }
8053 }
8054 }
8055 }
8056 }
8057
8058 /* Initialize data for reading perf counters from the MSR group. */
msr_perf_init(void)8059 void msr_perf_init(void)
8060 {
8061 bool need_amperf = false, need_smi = false;
8062 const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1);
8063
8064 need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz)
8065 || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1;
8066
8067 if (BIC_IS_ENABLED(BIC_SMI))
8068 need_smi = true;
8069
8070 /* Enable needed counters */
8071 msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf;
8072 msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf;
8073 msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi;
8074
8075 msr_perf_init_();
8076
8077 const bool has_amperf = has_amperf_access();
8078 const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present;
8079
8080 has_aperf_access = has_amperf;
8081
8082 if (has_amperf) {
8083 BIC_PRESENT(BIC_Avg_MHz);
8084 BIC_PRESENT(BIC_Busy);
8085 BIC_PRESENT(BIC_Bzy_MHz);
8086 BIC_PRESENT(BIC_SMI);
8087 }
8088
8089 if (has_smi)
8090 BIC_PRESENT(BIC_SMI);
8091 }
8092
cstate_perf_init_(bool soft_c1)8093 void cstate_perf_init_(bool soft_c1)
8094 {
8095 bool has_counter;
8096 bool *cores_visited = NULL, *pkg_visited = NULL;
8097 const int cores_visited_elems = topo.max_core_id + 1;
8098 const int pkg_visited_elems = topo.max_package_id + 1;
8099 const int cci_num = topo.max_cpu_num + 1;
8100
8101 ccstate_counter_info = calloc(cci_num, sizeof(*ccstate_counter_info));
8102 if (!ccstate_counter_info)
8103 err(1, "calloc ccstate_counter_arch_info");
8104 ccstate_counter_info_size = cci_num;
8105
8106 cores_visited = calloc(cores_visited_elems, sizeof(*cores_visited));
8107 if (!cores_visited)
8108 err(1, "calloc cores_visited");
8109
8110 pkg_visited = calloc(pkg_visited_elems, sizeof(*pkg_visited));
8111 if (!pkg_visited)
8112 err(1, "calloc pkg_visited");
8113
8114 /* Initialize cstate_counter_info_percpu */
8115 for (int cpu = 0; cpu < cci_num; ++cpu) {
8116 ccstate_counter_info[cpu].fd_perf_core = -1;
8117 ccstate_counter_info[cpu].fd_perf_pkg = -1;
8118 }
8119
8120 for (int cidx = 0; cidx < NUM_CSTATE_COUNTERS; ++cidx) {
8121 has_counter = false;
8122 memset(cores_visited, 0, cores_visited_elems * sizeof(*cores_visited));
8123 memset(pkg_visited, 0, pkg_visited_elems * sizeof(*pkg_visited));
8124
8125 const struct cstate_counter_arch_info *cai = &ccstate_counter_arch_infos[cidx];
8126
8127 for (int cpu = 0; cpu < cci_num; ++cpu) {
8128
8129 struct cstate_counter_info_t *const cci = &ccstate_counter_info[cpu];
8130
8131 if (cpu_is_not_allowed(cpu))
8132 continue;
8133
8134 const int core_id = cpus[cpu].physical_core_id;
8135 const int pkg_id = cpus[cpu].physical_package_id;
8136
8137 assert(core_id < cores_visited_elems);
8138 assert(pkg_id < pkg_visited_elems);
8139
8140 const bool per_thread = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_THREAD;
8141 const bool per_core = cai->flags & CSTATE_COUNTER_FLAG_COLLECT_PER_CORE;
8142
8143 if (!per_thread && cores_visited[core_id])
8144 continue;
8145
8146 if (!per_core && pkg_visited[pkg_id])
8147 continue;
8148
8149 const bool counter_needed = BIC_IS_ENABLED(cai->bic) ||
8150 (soft_c1 && (cai->flags & CSTATE_COUNTER_FLAG_SOFT_C1_DEPENDENCY));
8151 const bool counter_supported = (platform->supported_cstates & cai->feature_mask);
8152
8153 if (counter_needed && counter_supported) {
8154 /* Use perf API for this counter */
8155 if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) {
8156
8157 cci->source[cai->rci_index] = COUNTER_SOURCE_PERF;
8158
8159 /* User MSR for this counter */
8160 } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit
8161 && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) {
8162 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
8163 cci->msr[cai->rci_index] = cai->msr;
8164 }
8165 }
8166
8167 if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) {
8168 has_counter = true;
8169 cores_visited[core_id] = true;
8170 pkg_visited[pkg_id] = true;
8171 }
8172 }
8173
8174 /* If any CPU has access to the counter, make it present */
8175 if (has_counter)
8176 BIC_PRESENT(cai->bic);
8177 }
8178
8179 free(cores_visited);
8180 free(pkg_visited);
8181 }
8182
cstate_perf_init(void)8183 void cstate_perf_init(void)
8184 {
8185 /*
8186 * If we don't have a C1 residency MSR, we calculate it "in software",
8187 * but we need APERF, MPERF too.
8188 */
8189 const bool soft_c1 = !platform->has_msr_core_c1_res && has_amperf_access()
8190 && platform->supported_cstates & CC1;
8191
8192 if (soft_c1)
8193 BIC_PRESENT(BIC_CPU_c1);
8194
8195 cstate_perf_init_(soft_c1);
8196 }
8197
probe_cstates(void)8198 void probe_cstates(void)
8199 {
8200 probe_cst_limit();
8201
8202 if (platform->has_msr_module_c6_res_ms)
8203 BIC_PRESENT(BIC_Mod_c6);
8204
8205 if (platform->has_ext_cst_msrs && !no_msr) {
8206 BIC_PRESENT(BIC_Totl_c0);
8207 BIC_PRESENT(BIC_Any_c0);
8208 BIC_PRESENT(BIC_GFX_c0);
8209 BIC_PRESENT(BIC_CPUGFX);
8210 }
8211
8212 if (quiet)
8213 return;
8214
8215 dump_power_ctl();
8216 dump_cst_cfg();
8217 decode_c6_demotion_policy_msr();
8218 print_dev_latency();
8219 dump_sysfs_cstate_config();
8220 print_irtl();
8221 }
8222
probe_lpi(void)8223 void probe_lpi(void)
8224 {
8225 if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
8226 BIC_PRESENT(BIC_CPU_LPI);
8227 else
8228 BIC_NOT_PRESENT(BIC_CPU_LPI);
8229
8230 if (!access(sys_lpi_file_sysfs, R_OK)) {
8231 sys_lpi_file = sys_lpi_file_sysfs;
8232 BIC_PRESENT(BIC_SYS_LPI);
8233 } else if (!access(sys_lpi_file_debugfs, R_OK)) {
8234 sys_lpi_file = sys_lpi_file_debugfs;
8235 BIC_PRESENT(BIC_SYS_LPI);
8236 } else {
8237 sys_lpi_file_sysfs = NULL;
8238 BIC_NOT_PRESENT(BIC_SYS_LPI);
8239 }
8240
8241 }
8242
probe_pstates(void)8243 void probe_pstates(void)
8244 {
8245 probe_bclk();
8246
8247 if (quiet)
8248 return;
8249
8250 dump_platform_info();
8251 dump_turbo_ratio_info();
8252 dump_sysfs_pstate_config();
8253 decode_misc_pwr_mgmt_msr();
8254
8255 for_all_cpus(print_hwp, ODD_COUNTERS);
8256 for_all_cpus(print_epb, ODD_COUNTERS);
8257 for_all_cpus(print_perf_limit, ODD_COUNTERS);
8258 }
8259
process_cpuid()8260 void process_cpuid()
8261 {
8262 unsigned int eax, ebx, ecx, edx;
8263 unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
8264 unsigned long long ucode_patch = 0;
8265 bool ucode_patch_valid = false;
8266
8267 eax = ebx = ecx = edx = 0;
8268
8269 __cpuid(0, max_level, ebx, ecx, edx);
8270
8271 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
8272 genuine_intel = 1;
8273 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
8274 authentic_amd = 1;
8275 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
8276 hygon_genuine = 1;
8277
8278 if (!quiet)
8279 fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
8280 (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
8281
8282 __cpuid(1, fms, ebx, ecx, edx);
8283 family = (fms >> 8) & 0xf;
8284 model = (fms >> 4) & 0xf;
8285 stepping = fms & 0xf;
8286 if (family == 0xf)
8287 family += (fms >> 20) & 0xff;
8288 if (family >= 6)
8289 model += ((fms >> 16) & 0xf) << 4;
8290 ecx_flags = ecx;
8291 edx_flags = edx;
8292
8293 if (!no_msr) {
8294 if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
8295 warnx("get_msr(UCODE)");
8296 else
8297 ucode_patch_valid = true;
8298 }
8299
8300 /*
8301 * check max extended function levels of CPUID.
8302 * This is needed to check for invariant TSC.
8303 * This check is valid for both Intel and AMD.
8304 */
8305 ebx = ecx = edx = 0;
8306 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
8307
8308 if (!quiet) {
8309 fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
8310 family, model, stepping, family, model, stepping);
8311 if (ucode_patch_valid)
8312 fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
8313 fputc('\n', outf);
8314
8315 fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
8316 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
8317 ecx_flags & (1 << 0) ? "SSE3" : "-",
8318 ecx_flags & (1 << 3) ? "MONITOR" : "-",
8319 ecx_flags & (1 << 6) ? "SMX" : "-",
8320 ecx_flags & (1 << 7) ? "EIST" : "-",
8321 ecx_flags & (1 << 8) ? "TM2" : "-",
8322 edx_flags & (1 << 4) ? "TSC" : "-",
8323 edx_flags & (1 << 5) ? "MSR" : "-",
8324 edx_flags & (1 << 22) ? "ACPI-TM" : "-",
8325 edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
8326 }
8327
8328 probe_platform_features(family, model);
8329
8330 if (!(edx_flags & (1 << 5)))
8331 errx(1, "CPUID: no MSR");
8332
8333 if (max_extended_level >= 0x80000007) {
8334
8335 /*
8336 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
8337 * this check is valid for both Intel and AMD
8338 */
8339 __cpuid(0x80000007, eax, ebx, ecx, edx);
8340 has_invariant_tsc = edx & (1 << 8);
8341 }
8342
8343 /*
8344 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
8345 * this check is valid for both Intel and AMD
8346 */
8347
8348 __cpuid(0x6, eax, ebx, ecx, edx);
8349 has_aperf = ecx & (1 << 0);
8350 do_dts = eax & (1 << 0);
8351 if (do_dts)
8352 BIC_PRESENT(BIC_CoreTmp);
8353 has_turbo = eax & (1 << 1);
8354 do_ptm = eax & (1 << 6);
8355 if (do_ptm)
8356 BIC_PRESENT(BIC_PkgTmp);
8357 has_hwp = eax & (1 << 7);
8358 has_hwp_notify = eax & (1 << 8);
8359 has_hwp_activity_window = eax & (1 << 9);
8360 has_hwp_epp = eax & (1 << 10);
8361 has_hwp_pkg = eax & (1 << 11);
8362 has_epb = ecx & (1 << 3);
8363
8364 if (!quiet)
8365 fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
8366 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
8367 has_aperf ? "" : "No-",
8368 has_turbo ? "" : "No-",
8369 do_dts ? "" : "No-",
8370 do_ptm ? "" : "No-",
8371 has_hwp ? "" : "No-",
8372 has_hwp_notify ? "" : "No-",
8373 has_hwp_activity_window ? "" : "No-",
8374 has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
8375
8376 if (!quiet)
8377 decode_misc_enable_msr();
8378
8379 if (max_level >= 0x7 && !quiet) {
8380 int has_sgx;
8381
8382 ecx = 0;
8383
8384 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
8385
8386 has_sgx = ebx & (1 << 2);
8387
8388 is_hybrid = edx & (1 << 15);
8389
8390 fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-");
8391
8392 if (has_sgx)
8393 decode_feature_control_msr();
8394 }
8395
8396 if (max_level >= 0x15) {
8397 unsigned int eax_crystal;
8398 unsigned int ebx_tsc;
8399
8400 /*
8401 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
8402 */
8403 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
8404 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
8405
8406 if (ebx_tsc != 0) {
8407 if (!quiet && (ebx != 0))
8408 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
8409 eax_crystal, ebx_tsc, crystal_hz);
8410
8411 if (crystal_hz == 0)
8412 crystal_hz = platform->crystal_freq;
8413
8414 if (crystal_hz) {
8415 tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
8416 if (!quiet)
8417 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
8418 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
8419 }
8420 }
8421 }
8422 if (max_level >= 0x16) {
8423 unsigned int base_mhz, max_mhz, bus_mhz, edx;
8424
8425 /*
8426 * CPUID 16H Base MHz, Max MHz, Bus MHz
8427 */
8428 base_mhz = max_mhz = bus_mhz = edx = 0;
8429
8430 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
8431
8432 bclk = bus_mhz;
8433
8434 base_hz = base_mhz * 1000000;
8435 has_base_hz = 1;
8436
8437 if (platform->enable_tsc_tweak)
8438 tsc_tweak = base_hz / tsc_hz;
8439
8440 if (!quiet)
8441 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
8442 base_mhz, max_mhz, bus_mhz);
8443 }
8444
8445 if (has_aperf)
8446 aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
8447
8448 BIC_PRESENT(BIC_IRQ);
8449 BIC_PRESENT(BIC_NMI);
8450 BIC_PRESENT(BIC_TSC_MHz);
8451 }
8452
counter_info_init(void)8453 static void counter_info_init(void)
8454 {
8455 for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) {
8456 struct cstate_counter_arch_info *const cai = &ccstate_counter_arch_infos[i];
8457
8458 if (platform->has_msr_knl_core_c6_residency && cai->msr == MSR_CORE_C6_RESIDENCY)
8459 cai->msr = MSR_KNL_CORE_C6_RESIDENCY;
8460
8461 if (!platform->has_msr_core_c1_res && cai->msr == MSR_CORE_C1_RES)
8462 cai->msr = 0;
8463
8464 if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY)
8465 cai->msr = MSR_ATOM_PKG_C6_RESIDENCY;
8466 }
8467
8468 for (int i = 0; i < NUM_MSR_COUNTERS; ++i) {
8469 msr_counter_arch_infos[i].present = false;
8470 msr_counter_arch_infos[i].needed = false;
8471 }
8472 }
8473
probe_pm_features(void)8474 void probe_pm_features(void)
8475 {
8476 probe_pstates();
8477
8478 probe_cstates();
8479
8480 probe_lpi();
8481
8482 probe_intel_uncore_frequency();
8483
8484 probe_graphics();
8485
8486 probe_rapl();
8487
8488 probe_thermal();
8489
8490 if (platform->has_nhm_msrs && !no_msr)
8491 BIC_PRESENT(BIC_SMI);
8492
8493 if (!quiet)
8494 decode_misc_feature_control();
8495 }
8496
8497 /*
8498 * in /dev/cpu/ return success for names that are numbers
8499 * ie. filter out ".", "..", "microcode".
8500 */
dir_filter(const struct dirent * dirp)8501 int dir_filter(const struct dirent *dirp)
8502 {
8503 if (isdigit(dirp->d_name[0]))
8504 return 1;
8505 else
8506 return 0;
8507 }
8508
8509 char *possible_file = "/sys/devices/system/cpu/possible";
8510 char possible_buf[1024];
8511
initialize_cpu_possible_set(void)8512 int initialize_cpu_possible_set(void)
8513 {
8514 FILE *fp;
8515
8516 fp = fopen(possible_file, "r");
8517 if (!fp) {
8518 warn("open %s", possible_file);
8519 return -1;
8520 }
8521 if (fread(possible_buf, sizeof(char), 1024, fp) == 0) {
8522 warn("read %s", possible_file);
8523 goto err;
8524 }
8525 if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) {
8526 warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str);
8527 goto err;
8528 }
8529 return 0;
8530
8531 err:
8532 fclose(fp);
8533 return -1;
8534 }
8535
topology_probe(bool startup)8536 void topology_probe(bool startup)
8537 {
8538 int i;
8539 int max_core_id = 0;
8540 int max_package_id = 0;
8541 int max_siblings = 0;
8542
8543 /* Initialize num_cpus, max_cpu_num */
8544 set_max_cpu_num();
8545 topo.num_cpus = 0;
8546 for_all_proc_cpus(count_cpus);
8547 if (!summary_only)
8548 BIC_PRESENT(BIC_CPU);
8549
8550 if (debug > 1)
8551 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
8552
8553 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
8554 if (cpus == NULL)
8555 err(1, "calloc cpus");
8556
8557 /*
8558 * Allocate and initialize cpu_present_set
8559 */
8560 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
8561 if (cpu_present_set == NULL)
8562 err(3, "CPU_ALLOC");
8563 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8564 CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
8565 for_all_proc_cpus(mark_cpu_present);
8566
8567 /*
8568 * Allocate and initialize cpu_possible_set
8569 */
8570 cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1));
8571 if (cpu_possible_set == NULL)
8572 err(3, "CPU_ALLOC");
8573 cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8574 CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set);
8575 initialize_cpu_possible_set();
8576
8577 /*
8578 * Allocate and initialize cpu_effective_set
8579 */
8580 cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
8581 if (cpu_effective_set == NULL)
8582 err(3, "CPU_ALLOC");
8583 cpu_effective_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8584 CPU_ZERO_S(cpu_effective_setsize, cpu_effective_set);
8585 update_effective_set(startup);
8586
8587 /*
8588 * Allocate and initialize cpu_allowed_set
8589 */
8590 cpu_allowed_set = CPU_ALLOC((topo.max_cpu_num + 1));
8591 if (cpu_allowed_set == NULL)
8592 err(3, "CPU_ALLOC");
8593 cpu_allowed_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8594 CPU_ZERO_S(cpu_allowed_setsize, cpu_allowed_set);
8595
8596 /*
8597 * Validate and update cpu_allowed_set.
8598 *
8599 * Make sure all cpus in cpu_subset are also in cpu_present_set during startup.
8600 * Give a warning when cpus in cpu_subset become unavailable at runtime.
8601 * Give a warning when cpus are not effective because of cgroup setting.
8602 *
8603 * cpu_allowed_set is the intersection of cpu_present_set/cpu_effective_set/cpu_subset.
8604 */
8605 for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
8606 if (cpu_subset && !CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
8607 continue;
8608
8609 if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) {
8610 if (cpu_subset) {
8611 /* cpus in cpu_subset must be in cpu_present_set during startup */
8612 if (startup)
8613 err(1, "cpu%d not present", i);
8614 else
8615 fprintf(stderr, "cpu%d not present\n", i);
8616 }
8617 continue;
8618 }
8619
8620 if (CPU_COUNT_S(cpu_effective_setsize, cpu_effective_set)) {
8621 if (!CPU_ISSET_S(i, cpu_effective_setsize, cpu_effective_set)) {
8622 fprintf(stderr, "cpu%d not effective\n", i);
8623 continue;
8624 }
8625 }
8626
8627 CPU_SET_S(i, cpu_allowed_setsize, cpu_allowed_set);
8628 }
8629
8630 if (!CPU_COUNT_S(cpu_allowed_setsize, cpu_allowed_set))
8631 err(-ENODEV, "No valid cpus found");
8632 sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set);
8633
8634 /*
8635 * Allocate and initialize cpu_affinity_set
8636 */
8637 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
8638 if (cpu_affinity_set == NULL)
8639 err(3, "CPU_ALLOC");
8640 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
8641 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
8642
8643 for_all_proc_cpus(init_thread_id);
8644
8645 for_all_proc_cpus(set_cpu_hybrid_type);
8646
8647 /*
8648 * For online cpus
8649 * find max_core_id, max_package_id
8650 */
8651 for (i = 0; i <= topo.max_cpu_num; ++i) {
8652 int siblings;
8653
8654 if (cpu_is_not_present(i)) {
8655 if (debug > 1)
8656 fprintf(outf, "cpu%d NOT PRESENT\n", i);
8657 continue;
8658 }
8659
8660 cpus[i].logical_cpu_id = i;
8661
8662 /* get package information */
8663 cpus[i].physical_package_id = get_physical_package_id(i);
8664 if (cpus[i].physical_package_id > max_package_id)
8665 max_package_id = cpus[i].physical_package_id;
8666
8667 /* get die information */
8668 cpus[i].die_id = get_die_id(i);
8669 if (cpus[i].die_id > topo.max_die_id)
8670 topo.max_die_id = cpus[i].die_id;
8671
8672 /* get numa node information */
8673 cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
8674 if (cpus[i].physical_node_id > topo.max_node_num)
8675 topo.max_node_num = cpus[i].physical_node_id;
8676
8677 /* get core information */
8678 cpus[i].physical_core_id = get_core_id(i);
8679 if (cpus[i].physical_core_id > max_core_id)
8680 max_core_id = cpus[i].physical_core_id;
8681
8682 /* get thread information */
8683 siblings = get_thread_siblings(&cpus[i]);
8684 if (siblings > max_siblings)
8685 max_siblings = siblings;
8686 if (cpus[i].thread_id == 0)
8687 topo.num_cores++;
8688 }
8689 topo.max_core_id = max_core_id;
8690 topo.max_package_id = max_package_id;
8691
8692 topo.cores_per_node = max_core_id + 1;
8693 if (debug > 1)
8694 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node);
8695 if (!summary_only)
8696 BIC_PRESENT(BIC_Core);
8697
8698 topo.num_die = topo.max_die_id + 1;
8699 if (debug > 1)
8700 fprintf(outf, "max_die_id %d, sizing for %d die\n", topo.max_die_id, topo.num_die);
8701 if (!summary_only && topo.num_die > 1)
8702 BIC_PRESENT(BIC_Die);
8703
8704 topo.num_packages = max_package_id + 1;
8705 if (debug > 1)
8706 fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages);
8707 if (!summary_only && topo.num_packages > 1)
8708 BIC_PRESENT(BIC_Package);
8709
8710 set_node_data();
8711 if (debug > 1)
8712 fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
8713 if (!summary_only && topo.nodes_per_pkg > 1)
8714 BIC_PRESENT(BIC_Node);
8715
8716 topo.threads_per_core = max_siblings;
8717 if (debug > 1)
8718 fprintf(outf, "max_siblings %d\n", max_siblings);
8719
8720 if (debug < 1)
8721 return;
8722
8723 for (i = 0; i <= topo.max_cpu_num; ++i) {
8724 if (cpu_is_not_present(i))
8725 continue;
8726 fprintf(outf,
8727 "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
8728 i, cpus[i].physical_package_id, cpus[i].die_id,
8729 cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id);
8730 }
8731
8732 }
8733
allocate_counters(struct thread_data ** t,struct core_data ** c,struct pkg_data ** p)8734 void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
8735 {
8736 int i;
8737 int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages;
8738 int num_threads = topo.threads_per_core * num_cores;
8739
8740 *t = calloc(num_threads, sizeof(struct thread_data));
8741 if (*t == NULL)
8742 goto error;
8743
8744 for (i = 0; i < num_threads; i++)
8745 (*t)[i].cpu_id = -1;
8746
8747 *c = calloc(num_cores, sizeof(struct core_data));
8748 if (*c == NULL)
8749 goto error;
8750
8751 for (i = 0; i < num_cores; i++) {
8752 (*c)[i].core_id = -1;
8753 (*c)[i].base_cpu = -1;
8754 }
8755
8756 *p = calloc(topo.num_packages, sizeof(struct pkg_data));
8757 if (*p == NULL)
8758 goto error;
8759
8760 for (i = 0; i < topo.num_packages; i++) {
8761 (*p)[i].package_id = i;
8762 (*p)[i].base_cpu = -1;
8763 }
8764
8765 return;
8766 error:
8767 err(1, "calloc counters");
8768 }
8769
8770 /*
8771 * init_counter()
8772 *
8773 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
8774 */
init_counter(struct thread_data * thread_base,struct core_data * core_base,struct pkg_data * pkg_base,int cpu_id)8775 void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id)
8776 {
8777 int pkg_id = cpus[cpu_id].physical_package_id;
8778 int node_id = cpus[cpu_id].logical_node_id;
8779 int core_id = cpus[cpu_id].physical_core_id;
8780 int thread_id = cpus[cpu_id].thread_id;
8781 struct thread_data *t;
8782 struct core_data *c;
8783 struct pkg_data *p;
8784
8785 /* Workaround for systems where physical_node_id==-1
8786 * and logical_node_id==(-1 - topo.num_cpus)
8787 */
8788 if (node_id < 0)
8789 node_id = 0;
8790
8791 t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
8792 c = GET_CORE(core_base, core_id, node_id, pkg_id);
8793 p = GET_PKG(pkg_base, pkg_id);
8794
8795 t->cpu_id = cpu_id;
8796 if (!cpu_is_not_allowed(cpu_id)) {
8797 if (c->base_cpu < 0)
8798 c->base_cpu = t->cpu_id;
8799 if (p->base_cpu < 0)
8800 p->base_cpu = t->cpu_id;
8801 }
8802
8803 c->core_id = core_id;
8804 p->package_id = pkg_id;
8805 }
8806
initialize_counters(int cpu_id)8807 int initialize_counters(int cpu_id)
8808 {
8809 init_counter(EVEN_COUNTERS, cpu_id);
8810 init_counter(ODD_COUNTERS, cpu_id);
8811 return 0;
8812 }
8813
allocate_output_buffer()8814 void allocate_output_buffer()
8815 {
8816 output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
8817 outp = output_buffer;
8818 if (outp == NULL)
8819 err(-1, "calloc output buffer");
8820 }
8821
allocate_fd_percpu(void)8822 void allocate_fd_percpu(void)
8823 {
8824 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
8825 if (fd_percpu == NULL)
8826 err(-1, "calloc fd_percpu");
8827 }
8828
allocate_irq_buffers(void)8829 void allocate_irq_buffers(void)
8830 {
8831 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
8832 if (irq_column_2_cpu == NULL)
8833 err(-1, "calloc %d", topo.num_cpus);
8834
8835 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
8836 if (irqs_per_cpu == NULL)
8837 err(-1, "calloc %d IRQ", topo.max_cpu_num + 1);
8838
8839 nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
8840 if (nmi_per_cpu == NULL)
8841 err(-1, "calloc %d NMI", topo.max_cpu_num + 1);
8842 }
8843
update_topo(struct thread_data * t,struct core_data * c,struct pkg_data * p)8844 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
8845 {
8846 topo.allowed_cpus++;
8847 if ((int)t->cpu_id == c->base_cpu)
8848 topo.allowed_cores++;
8849 if ((int)t->cpu_id == p->base_cpu)
8850 topo.allowed_packages++;
8851
8852 return 0;
8853 }
8854
topology_update(void)8855 void topology_update(void)
8856 {
8857 topo.allowed_cpus = 0;
8858 topo.allowed_cores = 0;
8859 topo.allowed_packages = 0;
8860 for_all_cpus(update_topo, ODD_COUNTERS);
8861 }
8862
setup_all_buffers(bool startup)8863 void setup_all_buffers(bool startup)
8864 {
8865 topology_probe(startup);
8866 allocate_irq_buffers();
8867 allocate_fd_percpu();
8868 allocate_counters(&thread_even, &core_even, &package_even);
8869 allocate_counters(&thread_odd, &core_odd, &package_odd);
8870 allocate_output_buffer();
8871 for_all_proc_cpus(initialize_counters);
8872 topology_update();
8873 }
8874
set_base_cpu(void)8875 void set_base_cpu(void)
8876 {
8877 int i;
8878
8879 for (i = 0; i < topo.max_cpu_num + 1; ++i) {
8880 if (cpu_is_not_allowed(i))
8881 continue;
8882 base_cpu = i;
8883 if (debug > 1)
8884 fprintf(outf, "base_cpu = %d\n", base_cpu);
8885 return;
8886 }
8887 err(-ENODEV, "No valid cpus found");
8888 }
8889
has_added_counters(void)8890 bool has_added_counters(void)
8891 {
8892 /*
8893 * It only makes sense to call this after the command line is parsed,
8894 * otherwise sys structure is not populated.
8895 */
8896
8897 return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
8898 }
8899
check_msr_access(void)8900 void check_msr_access(void)
8901 {
8902 check_dev_msr();
8903 check_msr_permission();
8904
8905 if (no_msr)
8906 bic_disable_msr_access();
8907 }
8908
check_perf_access(void)8909 void check_perf_access(void)
8910 {
8911 if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access())
8912 bic_enabled &= ~BIC_IPC;
8913 }
8914
perf_has_hybrid_devices(void)8915 bool perf_has_hybrid_devices(void)
8916 {
8917 /*
8918 * 0: unknown
8919 * 1: has separate perf device for p and e core
8920 * -1: doesn't have separate perf device for p and e core
8921 */
8922 static int cached;
8923
8924 if (cached > 0)
8925 return true;
8926
8927 if (cached < 0)
8928 return false;
8929
8930 if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) {
8931 cached = -1;
8932 return false;
8933 }
8934
8935 if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) {
8936 cached = -1;
8937 return false;
8938 }
8939
8940 cached = 1;
8941 return true;
8942 }
8943
added_perf_counters_init_(struct perf_counter_info * pinfo)8944 int added_perf_counters_init_(struct perf_counter_info *pinfo)
8945 {
8946 size_t num_domains = 0;
8947 unsigned int next_domain;
8948 bool *domain_visited;
8949 unsigned int perf_type, perf_config;
8950 double perf_scale;
8951 int fd_perf;
8952
8953 if (!pinfo)
8954 return 0;
8955
8956 const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1));
8957
8958 domain_visited = calloc(max_num_domains, sizeof(*domain_visited));
8959
8960 while (pinfo) {
8961 switch (pinfo->scope) {
8962 case SCOPE_CPU:
8963 num_domains = topo.max_cpu_num + 1;
8964 break;
8965
8966 case SCOPE_CORE:
8967 num_domains = topo.max_core_id + 1;
8968 break;
8969
8970 case SCOPE_PACKAGE:
8971 num_domains = topo.max_package_id + 1;
8972 break;
8973 }
8974
8975 /* Allocate buffer for file descriptor for each domain. */
8976 pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain));
8977 if (!pinfo->fd_perf_per_domain)
8978 errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain");
8979
8980 for (size_t i = 0; i < num_domains; ++i)
8981 pinfo->fd_perf_per_domain[i] = -1;
8982
8983 pinfo->num_domains = num_domains;
8984 pinfo->scale = 1.0;
8985
8986 memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited));
8987
8988 for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
8989
8990 next_domain = cpu_to_domain(pinfo, cpu);
8991
8992 assert(next_domain < num_domains);
8993
8994 if (cpu_is_not_allowed(cpu))
8995 continue;
8996
8997 if (domain_visited[next_domain])
8998 continue;
8999
9000 /*
9001 * Intel hybrid platforms expose different perf devices for P and E cores.
9002 * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are
9003 * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}".
9004 *
9005 * This makes it more complicated to the user, because most of the counters
9006 * are available on both and have to be handled manually, otherwise.
9007 *
9008 * Code below, allow user to use the old "cpu" name, which is translated accordingly.
9009 */
9010 const char *perf_device = pinfo->device;
9011
9012 if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) {
9013 switch (cpus[cpu].type) {
9014 case INTEL_PCORE_TYPE:
9015 perf_device = "cpu_core";
9016 break;
9017
9018 case INTEL_ECORE_TYPE:
9019 perf_device = "cpu_atom";
9020 break;
9021
9022 default: /* Don't change, we will probably fail and report a problem soon. */
9023 break;
9024 }
9025 }
9026
9027 perf_type = read_perf_type(perf_device);
9028 if (perf_type == (unsigned int)-1) {
9029 warnx("%s: perf/%s/%s: failed to read %s",
9030 __func__, perf_device, pinfo->event, "type");
9031 continue;
9032 }
9033
9034 perf_config = read_perf_config(perf_device, pinfo->event);
9035 if (perf_config == (unsigned int)-1) {
9036 warnx("%s: perf/%s/%s: failed to read %s",
9037 __func__, perf_device, pinfo->event, "config");
9038 continue;
9039 }
9040
9041 /* Scale is not required, some counters just don't have it. */
9042 perf_scale = read_perf_scale(perf_device, pinfo->event);
9043 if (perf_scale == 0.0)
9044 perf_scale = 1.0;
9045
9046 fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0);
9047 if (fd_perf == -1) {
9048 warnx("%s: perf/%s/%s: failed to open counter on cpu%d",
9049 __func__, perf_device, pinfo->event, cpu);
9050 continue;
9051 }
9052
9053 domain_visited[next_domain] = 1;
9054 pinfo->fd_perf_per_domain[next_domain] = fd_perf;
9055 pinfo->scale = perf_scale;
9056
9057 if (debug)
9058 fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n",
9059 perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]);
9060 }
9061
9062 pinfo = pinfo->next;
9063 }
9064
9065 free(domain_visited);
9066
9067 return 0;
9068 }
9069
added_perf_counters_init(void)9070 void added_perf_counters_init(void)
9071 {
9072 if (added_perf_counters_init_(sys.perf_tp))
9073 errx(1, "%s: %s", __func__, "thread");
9074
9075 if (added_perf_counters_init_(sys.perf_cp))
9076 errx(1, "%s: %s", __func__, "core");
9077
9078 if (added_perf_counters_init_(sys.perf_pp))
9079 errx(1, "%s: %s", __func__, "package");
9080 }
9081
parse_telem_info_file(int fd_dir,const char * info_filename,const char * format,unsigned long * output)9082 int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output)
9083 {
9084 int fd_telem_info;
9085 FILE *file_telem_info;
9086 unsigned long value;
9087
9088 fd_telem_info = openat(fd_dir, info_filename, O_RDONLY);
9089 if (fd_telem_info == -1)
9090 return -1;
9091
9092 file_telem_info = fdopen(fd_telem_info, "r");
9093 if (file_telem_info == NULL) {
9094 close(fd_telem_info);
9095 return -1;
9096 }
9097
9098 if (fscanf(file_telem_info, format, &value) != 1) {
9099 fclose(file_telem_info);
9100 return -1;
9101 }
9102
9103 fclose(file_telem_info);
9104
9105 *output = value;
9106
9107 return 0;
9108 }
9109
pmt_mmio_open(unsigned int target_guid)9110 struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
9111 {
9112 struct pmt_diriter_t pmt_iter;
9113 const struct dirent *entry;
9114 struct stat st;
9115 int fd_telem_dir, fd_pmt;
9116 unsigned long guid, size, offset;
9117 size_t mmap_size;
9118 void *mmio;
9119 struct pmt_mmio *head = NULL, *last = NULL;
9120 struct pmt_mmio *new_pmt = NULL;
9121
9122 if (stat(SYSFS_TELEM_PATH, &st) == -1)
9123 return NULL;
9124
9125 pmt_diriter_init(&pmt_iter);
9126 entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH);
9127 if (!entry) {
9128 pmt_diriter_remove(&pmt_iter);
9129 return NULL;
9130 }
9131
9132 for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
9133 if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1)
9134 break;
9135
9136 if (!S_ISDIR(st.st_mode))
9137 continue;
9138
9139 fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY);
9140 if (fd_telem_dir == -1)
9141 break;
9142
9143 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
9144 close(fd_telem_dir);
9145 break;
9146 }
9147
9148 if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) {
9149 close(fd_telem_dir);
9150 break;
9151 }
9152
9153 if (guid != target_guid) {
9154 close(fd_telem_dir);
9155 continue;
9156 }
9157
9158 if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) {
9159 close(fd_telem_dir);
9160 break;
9161 }
9162
9163 assert(offset == 0);
9164
9165 fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY);
9166 if (fd_pmt == -1)
9167 goto loop_cleanup_and_break;
9168
9169 mmap_size = ROUND_UP_TO_PAGE_SIZE(size);
9170 mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0);
9171 if (mmio != MAP_FAILED) {
9172 if (debug)
9173 fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio);
9174
9175 new_pmt = calloc(1, sizeof(*new_pmt));
9176
9177 if (!new_pmt) {
9178 fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__);
9179 exit(1);
9180 }
9181
9182 /*
9183 * Create linked list of mmaped regions,
9184 * but preserve the ordering from sysfs.
9185 * Ordering is important for the user to
9186 * use the seq=%u parameter when adding a counter.
9187 */
9188 new_pmt->guid = guid;
9189 new_pmt->mmio_base = mmio;
9190 new_pmt->pmt_offset = offset;
9191 new_pmt->size = size;
9192 new_pmt->next = pmt_mmios;
9193
9194 if (last)
9195 last->next = new_pmt;
9196 else
9197 head = new_pmt;
9198
9199 last = new_pmt;
9200 }
9201
9202 loop_cleanup_and_break:
9203 close(fd_pmt);
9204 close(fd_telem_dir);
9205 }
9206
9207 pmt_diriter_remove(&pmt_iter);
9208
9209 /*
9210 * If we found something, stick just
9211 * created linked list to the front.
9212 */
9213 if (head)
9214 pmt_mmios = head;
9215
9216 return head;
9217 }
9218
pmt_mmio_find(unsigned int guid)9219 struct pmt_mmio *pmt_mmio_find(unsigned int guid)
9220 {
9221 struct pmt_mmio *pmmio = pmt_mmios;
9222
9223 while (pmmio) {
9224 if (pmmio->guid == guid)
9225 return pmmio;
9226
9227 pmmio = pmmio->next;
9228 }
9229
9230 return NULL;
9231 }
9232
pmt_get_counter_pointer(struct pmt_mmio * pmmio,unsigned long counter_offset)9233 void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset)
9234 {
9235 char *ret;
9236
9237 /* Get base of mmaped PMT file. */
9238 ret = (char *)pmmio->mmio_base;
9239
9240 /*
9241 * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data.
9242 * It's not guaranteed that the mmaped memory begins with the telemetry data
9243 * - we might have to apply the offset first.
9244 */
9245 ret += pmmio->pmt_offset;
9246
9247 /* Apply the counter offset to get the address to the mmaped counter. */
9248 ret += counter_offset;
9249
9250 return ret;
9251 }
9252
pmt_add_guid(unsigned int guid,unsigned int seq)9253 struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq)
9254 {
9255 struct pmt_mmio *ret;
9256
9257 ret = pmt_mmio_find(guid);
9258 if (!ret)
9259 ret = pmt_mmio_open(guid);
9260
9261 while (ret && seq) {
9262 ret = ret->next;
9263 --seq;
9264 }
9265
9266 return ret;
9267 }
9268
9269 enum pmt_open_mode {
9270 PMT_OPEN_TRY, /* Open failure is not an error. */
9271 PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */
9272 };
9273
pmt_find_counter(struct pmt_counter * pcounter,const char * name)9274 struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name)
9275 {
9276 while (pcounter) {
9277 if (strcmp(pcounter->name, name) == 0)
9278 break;
9279
9280 pcounter = pcounter->next;
9281 }
9282
9283 return pcounter;
9284 }
9285
pmt_get_scope_root(enum counter_scope scope)9286 struct pmt_counter **pmt_get_scope_root(enum counter_scope scope)
9287 {
9288 switch (scope) {
9289 case SCOPE_CPU:
9290 return &sys.pmt_tp;
9291 case SCOPE_CORE:
9292 return &sys.pmt_cp;
9293 case SCOPE_PACKAGE:
9294 return &sys.pmt_pp;
9295 }
9296
9297 __builtin_unreachable();
9298 }
9299
pmt_counter_add_domain(struct pmt_counter * pcounter,unsigned long * pmmio,unsigned int domain_id)9300 void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id)
9301 {
9302 /* Make sure the new domain fits. */
9303 if (domain_id >= pcounter->num_domains)
9304 pmt_counter_resize(pcounter, domain_id + 1);
9305
9306 assert(pcounter->domains);
9307 assert(domain_id < pcounter->num_domains);
9308
9309 pcounter->domains[domain_id].pcounter = pmmio;
9310 }
9311
pmt_add_counter(unsigned int guid,unsigned int seq,const char * name,enum pmt_datatype type,unsigned int lsb,unsigned int msb,unsigned int offset,enum counter_scope scope,enum counter_format format,unsigned int domain_id,enum pmt_open_mode mode)9312 int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type,
9313 unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope,
9314 enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode)
9315 {
9316 struct pmt_mmio *mmio;
9317 struct pmt_counter *pcounter;
9318 struct pmt_counter **const pmt_root = pmt_get_scope_root(scope);
9319 bool new_counter = false;
9320 int conflict = 0;
9321
9322 if (lsb > msb) {
9323 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name);
9324 exit(1);
9325 }
9326
9327 if (msb >= 64) {
9328 fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name);
9329 exit(1);
9330 }
9331
9332 mmio = pmt_add_guid(guid, seq);
9333 if (!mmio) {
9334 if (mode != PMT_OPEN_TRY) {
9335 fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq);
9336 exit(1);
9337 }
9338
9339 return 1;
9340 }
9341
9342 if (offset >= mmio->size) {
9343 if (mode != PMT_OPEN_TRY) {
9344 fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size);
9345 exit(1);
9346 }
9347
9348 return 1;
9349 }
9350
9351 pcounter = pmt_find_counter(*pmt_root, name);
9352 if (!pcounter) {
9353 pcounter = calloc(1, sizeof(*pcounter));
9354 new_counter = true;
9355 }
9356
9357 if (new_counter) {
9358 strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1);
9359 pcounter->type = type;
9360 pcounter->scope = scope;
9361 pcounter->lsb = lsb;
9362 pcounter->msb = msb;
9363 pcounter->format = format;
9364 } else {
9365 conflict += pcounter->type != type;
9366 conflict += pcounter->scope != scope;
9367 conflict += pcounter->lsb != lsb;
9368 conflict += pcounter->msb != msb;
9369 conflict += pcounter->format != format;
9370 }
9371
9372 if (conflict) {
9373 fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n",
9374 __func__, name);
9375 exit(1);
9376 }
9377
9378 pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id);
9379
9380 if (new_counter) {
9381 pcounter->next = *pmt_root;
9382 *pmt_root = pcounter;
9383 }
9384
9385 return 0;
9386 }
9387
pmt_init(void)9388 void pmt_init(void)
9389 {
9390 int cpu_num;
9391 unsigned long seq, offset, mod_num;
9392
9393 if (BIC_IS_ENABLED(BIC_Diec6)) {
9394 pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME,
9395 PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET,
9396 SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY);
9397 }
9398
9399 if (BIC_IS_ENABLED(BIC_CPU_c1e)) {
9400 seq = 0;
9401 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
9402 mod_num = 0; /* Relative module number for current PMT file. */
9403
9404 /* Open the counter for each CPU. */
9405 for (cpu_num = 0; cpu_num < topo.max_cpu_num;) {
9406
9407 if (cpu_is_not_allowed(cpu_num))
9408 goto next_loop_iter;
9409
9410 /*
9411 * Set the scope to CPU, even though CWF report the counter per module.
9412 * CPUs inside the same module will read from the same location, instead of reporting zeros.
9413 *
9414 * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK.
9415 */
9416 pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK,
9417 PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU,
9418 FORMAT_DELTA, cpu_num, PMT_OPEN_TRY);
9419
9420 /*
9421 * Rather complex logic for each time we go to the next loop iteration,
9422 * so keep it as a label.
9423 */
9424 next_loop_iter:
9425 /*
9426 * Advance the cpu number and check if we should also advance offset to
9427 * the next counter inside the PMT file.
9428 *
9429 * On Clearwater Forest platform, the counter is reported per module,
9430 * so open the same counter for all of the CPUs inside the module.
9431 * That way, reported table show the correct value for all of the CPUs inside the module,
9432 * instead of zeros.
9433 */
9434 ++cpu_num;
9435 if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) {
9436 offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT;
9437 ++mod_num;
9438 }
9439
9440 /*
9441 * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file.
9442 *
9443 * If that number is reached, seq must be incremented to advance to the next file in a sequence.
9444 * Offset inside that file and a module counter has to be reset.
9445 */
9446 if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) {
9447 ++seq;
9448 offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
9449 mod_num = 0;
9450 }
9451 }
9452 }
9453 }
9454
turbostat_init()9455 void turbostat_init()
9456 {
9457 setup_all_buffers(true);
9458 set_base_cpu();
9459 check_msr_access();
9460 check_perf_access();
9461 process_cpuid();
9462 counter_info_init();
9463 probe_pm_features();
9464 msr_perf_init();
9465 linux_perf_init();
9466 rapl_perf_init();
9467 cstate_perf_init();
9468 added_perf_counters_init();
9469 pmt_init();
9470
9471 for_all_cpus(get_cpu_type, ODD_COUNTERS);
9472 for_all_cpus(get_cpu_type, EVEN_COUNTERS);
9473
9474 if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1)
9475 BIC_PRESENT(BIC_IPC);
9476
9477 /*
9478 * If TSC tweak is needed, but couldn't get it,
9479 * disable more BICs, since it can't be reported accurately.
9480 */
9481 if (platform->enable_tsc_tweak && !has_base_hz) {
9482 bic_enabled &= ~BIC_Busy;
9483 bic_enabled &= ~BIC_Bzy_MHz;
9484 }
9485 }
9486
affinitize_child(void)9487 void affinitize_child(void)
9488 {
9489 /* Prefer cpu_possible_set, if available */
9490 if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) {
9491 warn("sched_setaffinity cpu_possible_set");
9492
9493 /* Otherwise, allow child to run on same cpu set as turbostat */
9494 if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set))
9495 warn("sched_setaffinity cpu_allowed_set");
9496 }
9497 }
9498
fork_it(char ** argv)9499 int fork_it(char **argv)
9500 {
9501 pid_t child_pid;
9502 int status;
9503
9504 snapshot_proc_sysfs_files();
9505 status = for_all_cpus(get_counters, EVEN_COUNTERS);
9506 first_counter_read = 0;
9507 if (status)
9508 exit(status);
9509 gettimeofday(&tv_even, (struct timezone *)NULL);
9510
9511 child_pid = fork();
9512 if (!child_pid) {
9513 /* child */
9514 affinitize_child();
9515 execvp(argv[0], argv);
9516 err(errno, "exec %s", argv[0]);
9517 } else {
9518
9519 /* parent */
9520 if (child_pid == -1)
9521 err(1, "fork");
9522
9523 signal(SIGINT, SIG_IGN);
9524 signal(SIGQUIT, SIG_IGN);
9525 if (waitpid(child_pid, &status, 0) == -1)
9526 err(status, "waitpid");
9527
9528 if (WIFEXITED(status))
9529 status = WEXITSTATUS(status);
9530 }
9531 /*
9532 * n.b. fork_it() does not check for errors from for_all_cpus()
9533 * because re-starting is problematic when forking
9534 */
9535 snapshot_proc_sysfs_files();
9536 for_all_cpus(get_counters, ODD_COUNTERS);
9537 gettimeofday(&tv_odd, (struct timezone *)NULL);
9538 timersub(&tv_odd, &tv_even, &tv_delta);
9539 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
9540 fprintf(outf, "%s: Counter reset detected\n", progname);
9541
9542 compute_average(EVEN_COUNTERS);
9543 format_all_counters(EVEN_COUNTERS);
9544
9545 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
9546
9547 flush_output_stderr();
9548
9549 return status;
9550 }
9551
get_and_dump_counters(void)9552 int get_and_dump_counters(void)
9553 {
9554 int status;
9555
9556 snapshot_proc_sysfs_files();
9557 status = for_all_cpus(get_counters, ODD_COUNTERS);
9558 if (status)
9559 return status;
9560
9561 status = for_all_cpus(dump_counters, ODD_COUNTERS);
9562 if (status)
9563 return status;
9564
9565 flush_output_stdout();
9566
9567 return status;
9568 }
9569
print_version()9570 void print_version()
9571 {
9572 fprintf(outf, "turbostat version 2025.02.02 - Len Brown <[email protected]>\n");
9573 }
9574
9575 #define COMMAND_LINE_SIZE 2048
9576
print_bootcmd(void)9577 void print_bootcmd(void)
9578 {
9579 char bootcmd[COMMAND_LINE_SIZE];
9580 FILE *fp;
9581 int ret;
9582
9583 memset(bootcmd, 0, COMMAND_LINE_SIZE);
9584 fp = fopen("/proc/cmdline", "r");
9585 if (!fp)
9586 return;
9587
9588 ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp);
9589 if (ret) {
9590 bootcmd[ret] = '\0';
9591 /* the last character is already '\n' */
9592 fprintf(outf, "Kernel command line: %s", bootcmd);
9593 }
9594
9595 fclose(fp);
9596 }
9597
find_msrp_by_name(struct msr_counter * head,char * name)9598 struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name)
9599 {
9600 struct msr_counter *mp;
9601
9602 for (mp = head; mp; mp = mp->next) {
9603 if (debug)
9604 fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name);
9605 if (!strncmp(name, mp->name, strlen(mp->name)))
9606 return mp;
9607 }
9608 return NULL;
9609 }
9610
add_counter(unsigned int msr_num,char * path,char * name,unsigned int width,enum counter_scope scope,enum counter_type type,enum counter_format format,int flags,int id)9611 int add_counter(unsigned int msr_num, char *path, char *name,
9612 unsigned int width, enum counter_scope scope,
9613 enum counter_type type, enum counter_format format, int flags, int id)
9614 {
9615 struct msr_counter *msrp;
9616
9617 if (no_msr && msr_num)
9618 errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
9619
9620 if (debug)
9621 fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n",
9622 __func__, msr_num, path, name, width, scope, type, format, flags, id);
9623
9624 switch (scope) {
9625
9626 case SCOPE_CPU:
9627 msrp = find_msrp_by_name(sys.tp, name);
9628 if (msrp) {
9629 if (debug)
9630 fprintf(stderr, "%s: %s FOUND\n", __func__, name);
9631 break;
9632 }
9633 if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) {
9634 warnx("ignoring thread counter %s", name);
9635 return -1;
9636 }
9637 break;
9638 case SCOPE_CORE:
9639 msrp = find_msrp_by_name(sys.cp, name);
9640 if (msrp) {
9641 if (debug)
9642 fprintf(stderr, "%s: %s FOUND\n", __func__, name);
9643 break;
9644 }
9645 if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) {
9646 warnx("ignoring core counter %s", name);
9647 return -1;
9648 }
9649 break;
9650 case SCOPE_PACKAGE:
9651 msrp = find_msrp_by_name(sys.pp, name);
9652 if (msrp) {
9653 if (debug)
9654 fprintf(stderr, "%s: %s FOUND\n", __func__, name);
9655 break;
9656 }
9657 if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) {
9658 warnx("ignoring package counter %s", name);
9659 return -1;
9660 }
9661 break;
9662 default:
9663 warnx("ignoring counter %s with unknown scope", name);
9664 return -1;
9665 }
9666
9667 if (msrp == NULL) {
9668 msrp = calloc(1, sizeof(struct msr_counter));
9669 if (msrp == NULL)
9670 err(-1, "calloc msr_counter");
9671
9672 msrp->msr_num = msr_num;
9673 strncpy(msrp->name, name, NAME_BYTES - 1);
9674 msrp->width = width;
9675 msrp->type = type;
9676 msrp->format = format;
9677 msrp->flags = flags;
9678
9679 switch (scope) {
9680 case SCOPE_CPU:
9681 msrp->next = sys.tp;
9682 sys.tp = msrp;
9683 break;
9684 case SCOPE_CORE:
9685 msrp->next = sys.cp;
9686 sys.cp = msrp;
9687 break;
9688 case SCOPE_PACKAGE:
9689 msrp->next = sys.pp;
9690 sys.pp = msrp;
9691 break;
9692 }
9693 }
9694
9695 if (path) {
9696 struct sysfs_path *sp;
9697
9698 sp = calloc(1, sizeof(struct sysfs_path));
9699 if (sp == NULL) {
9700 perror("calloc");
9701 exit(1);
9702 }
9703 strncpy(sp->path, path, PATH_BYTES - 1);
9704 sp->id = id;
9705 sp->next = msrp->sp;
9706 msrp->sp = sp;
9707 }
9708
9709 return 0;
9710 }
9711
9712 /*
9713 * Initialize the fields used for identifying and opening the counter.
9714 *
9715 * Defer the initialization of any runtime buffers for actually reading
9716 * the counters for when we initialize all perf counters, so we can later
9717 * easily call re_initialize().
9718 */
make_perf_counter_info(const char * perf_device,const char * perf_event,const char * name,unsigned int width,enum counter_scope scope,enum counter_type type,enum counter_format format)9719 struct perf_counter_info *make_perf_counter_info(const char *perf_device,
9720 const char *perf_event,
9721 const char *name,
9722 unsigned int width,
9723 enum counter_scope scope,
9724 enum counter_type type, enum counter_format format)
9725 {
9726 struct perf_counter_info *pinfo;
9727
9728 pinfo = calloc(1, sizeof(*pinfo));
9729 if (!pinfo)
9730 errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event);
9731
9732 strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1);
9733 strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1);
9734
9735 strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1);
9736 pinfo->width = width;
9737 pinfo->scope = scope;
9738 pinfo->type = type;
9739 pinfo->format = format;
9740
9741 return pinfo;
9742 }
9743
add_perf_counter(const char * perf_device,const char * perf_event,const char * name_buffer,unsigned int width,enum counter_scope scope,enum counter_type type,enum counter_format format)9744 int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width,
9745 enum counter_scope scope, enum counter_type type, enum counter_format format)
9746 {
9747 struct perf_counter_info *pinfo;
9748
9749 switch (scope) {
9750 case SCOPE_CPU:
9751 if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) {
9752 warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event);
9753 return -1;
9754 }
9755 break;
9756
9757 case SCOPE_CORE:
9758 if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) {
9759 warnx("ignoring core counter perf/%s/%s", perf_device, perf_event);
9760 return -1;
9761 }
9762 break;
9763
9764 case SCOPE_PACKAGE:
9765 if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) {
9766 warnx("ignoring package counter perf/%s/%s", perf_device, perf_event);
9767 return -1;
9768 }
9769 break;
9770 }
9771
9772 pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format);
9773
9774 if (!pinfo)
9775 return -1;
9776
9777 switch (scope) {
9778 case SCOPE_CPU:
9779 pinfo->next = sys.perf_tp;
9780 sys.perf_tp = pinfo;
9781 ++sys.added_thread_perf_counters;
9782 break;
9783
9784 case SCOPE_CORE:
9785 pinfo->next = sys.perf_cp;
9786 sys.perf_cp = pinfo;
9787 ++sys.added_core_perf_counters;
9788 break;
9789
9790 case SCOPE_PACKAGE:
9791 pinfo->next = sys.perf_pp;
9792 sys.perf_pp = pinfo;
9793 ++sys.added_package_perf_counters;
9794 break;
9795 }
9796
9797 // FIXME: we might not have debug here yet
9798 if (debug)
9799 fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n",
9800 __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope);
9801
9802 return 0;
9803 }
9804
parse_add_command_msr(char * add_command)9805 void parse_add_command_msr(char *add_command)
9806 {
9807 int msr_num = 0;
9808 char *path = NULL;
9809 char perf_device[PERF_DEV_NAME_BYTES] = "";
9810 char perf_event[PERF_EVT_NAME_BYTES] = "";
9811 char name_buffer[PERF_NAME_BYTES] = "";
9812 int width = 64;
9813 int fail = 0;
9814 enum counter_scope scope = SCOPE_CPU;
9815 enum counter_type type = COUNTER_CYCLES;
9816 enum counter_format format = FORMAT_DELTA;
9817
9818 while (add_command) {
9819
9820 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
9821 goto next;
9822
9823 if (sscanf(add_command, "msr%d", &msr_num) == 1)
9824 goto next;
9825
9826 BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31);
9827 BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31);
9828 if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2)
9829 goto next;
9830
9831 if (*add_command == '/') {
9832 path = add_command;
9833 goto next;
9834 }
9835
9836 if (sscanf(add_command, "u%d", &width) == 1) {
9837 if ((width == 32) || (width == 64))
9838 goto next;
9839 width = 64;
9840 }
9841 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
9842 scope = SCOPE_CPU;
9843 goto next;
9844 }
9845 if (!strncmp(add_command, "core", strlen("core"))) {
9846 scope = SCOPE_CORE;
9847 goto next;
9848 }
9849 if (!strncmp(add_command, "package", strlen("package"))) {
9850 scope = SCOPE_PACKAGE;
9851 goto next;
9852 }
9853 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
9854 type = COUNTER_CYCLES;
9855 goto next;
9856 }
9857 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
9858 type = COUNTER_SECONDS;
9859 goto next;
9860 }
9861 if (!strncmp(add_command, "usec", strlen("usec"))) {
9862 type = COUNTER_USEC;
9863 goto next;
9864 }
9865 if (!strncmp(add_command, "raw", strlen("raw"))) {
9866 format = FORMAT_RAW;
9867 goto next;
9868 }
9869 if (!strncmp(add_command, "delta", strlen("delta"))) {
9870 format = FORMAT_DELTA;
9871 goto next;
9872 }
9873 if (!strncmp(add_command, "percent", strlen("percent"))) {
9874 format = FORMAT_PERCENT;
9875 goto next;
9876 }
9877
9878 BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18);
9879 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) {
9880 char *eos;
9881
9882 eos = strchr(name_buffer, ',');
9883 if (eos)
9884 *eos = '\0';
9885 goto next;
9886 }
9887
9888 next:
9889 add_command = strchr(add_command, ',');
9890 if (add_command) {
9891 *add_command = '\0';
9892 add_command++;
9893 }
9894
9895 }
9896 if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) {
9897 fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n");
9898 fail++;
9899 }
9900
9901 /* Test for non-empty perf_device and perf_event */
9902 const bool is_perf_counter = perf_device[0] && perf_event[0];
9903
9904 /* generate default column header */
9905 if (*name_buffer == '\0') {
9906 if (is_perf_counter) {
9907 snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event);
9908 } else {
9909 if (width == 32)
9910 sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
9911 else
9912 sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
9913 }
9914 }
9915
9916 if (is_perf_counter) {
9917 if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format))
9918 fail++;
9919 } else {
9920 if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0))
9921 fail++;
9922 }
9923
9924 if (fail) {
9925 help();
9926 exit(1);
9927 }
9928 }
9929
starts_with(const char * str,const char * prefix)9930 bool starts_with(const char *str, const char *prefix)
9931 {
9932 return strncmp(prefix, str, strlen(prefix)) == 0;
9933 }
9934
pmt_parse_from_path(const char * target_path,unsigned int * out_guid,unsigned int * out_seq)9935 int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq)
9936 {
9937 struct pmt_diriter_t pmt_iter;
9938 const struct dirent *dirname;
9939 struct stat stat, target_stat;
9940 int fd_telem_dir = -1;
9941 int fd_target_dir;
9942 unsigned int seq = 0;
9943 unsigned long guid, target_guid;
9944 int ret = -1;
9945
9946 fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY);
9947 if (fd_target_dir == -1) {
9948 return -1;
9949 }
9950
9951 if (fstat(fd_target_dir, &target_stat) == -1) {
9952 fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno));
9953 exit(1);
9954 }
9955
9956 if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) {
9957 fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno));
9958 exit(1);
9959 }
9960
9961 close(fd_target_dir);
9962
9963 pmt_diriter_init(&pmt_iter);
9964
9965 for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL;
9966 dirname = pmt_diriter_next(&pmt_iter)) {
9967
9968 fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY);
9969 if (fd_telem_dir == -1)
9970 continue;
9971
9972 if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
9973 fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno));
9974 continue;
9975 }
9976
9977 if (fstat(fd_telem_dir, &stat) == -1) {
9978 fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__,
9979 dirname->d_name, strerror(errno));
9980 continue;
9981 }
9982
9983 /*
9984 * If reached the same directory as target, exit the loop.
9985 * Seq has the correct value now.
9986 */
9987 if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) {
9988 ret = 0;
9989 break;
9990 }
9991
9992 /*
9993 * If reached directory with the same guid,
9994 * but it's not the target directory yet,
9995 * increment seq and continue the search.
9996 */
9997 if (guid == target_guid)
9998 ++seq;
9999
10000 close(fd_telem_dir);
10001 fd_telem_dir = -1;
10002 }
10003
10004 pmt_diriter_remove(&pmt_iter);
10005
10006 if (fd_telem_dir != -1)
10007 close(fd_telem_dir);
10008
10009 if (!ret) {
10010 *out_guid = target_guid;
10011 *out_seq = seq;
10012 }
10013
10014 return ret;
10015 }
10016
parse_add_command_pmt(char * add_command)10017 void parse_add_command_pmt(char *add_command)
10018 {
10019 char *name = NULL;
10020 char *type_name = NULL;
10021 char *format_name = NULL;
10022 char *direct_path = NULL;
10023 static const char direct_path_prefix[] = "path=";
10024 unsigned int offset;
10025 unsigned int lsb;
10026 unsigned int msb;
10027 unsigned int guid;
10028 unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */
10029 unsigned int domain_id;
10030 enum counter_scope scope = 0;
10031 enum pmt_datatype type = PMT_TYPE_RAW;
10032 enum counter_format format = FORMAT_RAW;
10033 bool has_offset = false;
10034 bool has_lsb = false;
10035 bool has_msb = false;
10036 bool has_format = true; /* Format has a default value. */
10037 bool has_guid = false;
10038 bool has_scope = false;
10039 bool has_type = true; /* Type has a default value. */
10040
10041 /* Consume the "pmt," prefix. */
10042 add_command = strchr(add_command, ',');
10043 if (!add_command) {
10044 help();
10045 exit(1);
10046 }
10047 ++add_command;
10048
10049 while (add_command) {
10050 if (starts_with(add_command, "name=")) {
10051 name = add_command + strlen("name=");
10052 goto next;
10053 }
10054
10055 if (starts_with(add_command, "type=")) {
10056 type_name = add_command + strlen("type=");
10057 goto next;
10058 }
10059
10060 if (starts_with(add_command, "domain=")) {
10061 const size_t prefix_len = strlen("domain=");
10062
10063 if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) {
10064 scope = SCOPE_CPU;
10065 has_scope = true;
10066 } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) {
10067 scope = SCOPE_CORE;
10068 has_scope = true;
10069 } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) {
10070 scope = SCOPE_PACKAGE;
10071 has_scope = true;
10072 }
10073
10074 if (!has_scope) {
10075 printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n",
10076 __func__);
10077 exit(1);
10078 }
10079
10080 goto next;
10081 }
10082
10083 if (starts_with(add_command, "format=")) {
10084 format_name = add_command + strlen("format=");
10085 goto next;
10086 }
10087
10088 if (sscanf(add_command, "offset=%u", &offset) == 1) {
10089 has_offset = true;
10090 goto next;
10091 }
10092
10093 if (sscanf(add_command, "lsb=%u", &lsb) == 1) {
10094 has_lsb = true;
10095 goto next;
10096 }
10097
10098 if (sscanf(add_command, "msb=%u", &msb) == 1) {
10099 has_msb = true;
10100 goto next;
10101 }
10102
10103 if (sscanf(add_command, "guid=%x", &guid) == 1) {
10104 has_guid = true;
10105 goto next;
10106 }
10107
10108 if (sscanf(add_command, "seq=%x", &seq) == 1)
10109 goto next;
10110
10111 if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) {
10112 direct_path = add_command + strlen(direct_path_prefix);
10113 goto next;
10114 }
10115 next:
10116 add_command = strchr(add_command, ',');
10117 if (add_command) {
10118 *add_command = '\0';
10119 add_command++;
10120 }
10121 }
10122
10123 if (!name) {
10124 printf("%s: missing %s\n", __func__, "name");
10125 exit(1);
10126 }
10127
10128 if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) {
10129 printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES);
10130 exit(1);
10131 }
10132
10133 if (format_name) {
10134 has_format = false;
10135
10136 if (strcmp("raw", format_name) == 0) {
10137 format = FORMAT_RAW;
10138 has_format = true;
10139 }
10140
10141 if (strcmp("delta", format_name) == 0) {
10142 format = FORMAT_DELTA;
10143 has_format = true;
10144 }
10145
10146 if (!has_format) {
10147 fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name);
10148 exit(1);
10149 }
10150 }
10151
10152 if (type_name) {
10153 has_type = false;
10154
10155 if (strcmp("raw", type_name) == 0) {
10156 type = PMT_TYPE_RAW;
10157 has_type = true;
10158 }
10159
10160 if (strcmp("txtal_time", type_name) == 0) {
10161 type = PMT_TYPE_XTAL_TIME;
10162 has_type = true;
10163 }
10164
10165 if (strcmp("tcore_clock", type_name) == 0) {
10166 type = PMT_TYPE_TCORE_CLOCK;
10167 has_type = true;
10168 }
10169
10170 if (!has_type) {
10171 printf("%s: invalid %s: %s\n", __func__, "type", type_name);
10172 exit(1);
10173 }
10174 }
10175
10176 if (!has_offset) {
10177 printf("%s : missing %s\n", __func__, "offset");
10178 exit(1);
10179 }
10180
10181 if (!has_lsb) {
10182 printf("%s: missing %s\n", __func__, "lsb");
10183 exit(1);
10184 }
10185
10186 if (!has_msb) {
10187 printf("%s: missing %s\n", __func__, "msb");
10188 exit(1);
10189 }
10190
10191 if (direct_path && has_guid) {
10192 printf("%s: path and guid+seq parameters are mutually exclusive\n"
10193 "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path);
10194 exit(1);
10195 }
10196
10197 if (direct_path) {
10198 if (pmt_parse_from_path(direct_path, &guid, &seq)) {
10199 printf("%s: failed to parse PMT file from %s\n", __func__, direct_path);
10200 exit(1);
10201 }
10202
10203 /* GUID was just infered from the direct path. */
10204 has_guid = true;
10205 }
10206
10207 if (!has_guid) {
10208 printf("%s: missing %s\n", __func__, "guid or path");
10209 exit(1);
10210 }
10211
10212 if (!has_scope) {
10213 printf("%s: missing %s\n", __func__, "scope");
10214 exit(1);
10215 }
10216
10217 if (lsb > msb) {
10218 printf("%s: lsb > msb doesn't make sense\n", __func__);
10219 exit(1);
10220 }
10221
10222 pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
10223 }
10224
parse_add_command(char * add_command)10225 void parse_add_command(char *add_command)
10226 {
10227 if (strncmp(add_command, "pmt", strlen("pmt")) == 0)
10228 return parse_add_command_pmt(add_command);
10229 return parse_add_command_msr(add_command);
10230 }
10231
is_deferred_add(char * name)10232 int is_deferred_add(char *name)
10233 {
10234 int i;
10235
10236 for (i = 0; i < deferred_add_index; ++i)
10237 if (!strcmp(name, deferred_add_names[i]))
10238 return 1;
10239 return 0;
10240 }
10241
is_deferred_skip(char * name)10242 int is_deferred_skip(char *name)
10243 {
10244 int i;
10245
10246 for (i = 0; i < deferred_skip_index; ++i)
10247 if (!strcmp(name, deferred_skip_names[i]))
10248 return 1;
10249 return 0;
10250 }
10251
probe_sysfs(void)10252 void probe_sysfs(void)
10253 {
10254 char path[64];
10255 char name_buf[16];
10256 FILE *input;
10257 int state;
10258 char *sp;
10259
10260 for (state = 10; state >= 0; --state) {
10261
10262 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
10263 input = fopen(path, "r");
10264 if (input == NULL)
10265 continue;
10266 if (!fgets(name_buf, sizeof(name_buf), input))
10267 err(1, "%s: failed to read file", path);
10268
10269 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
10270 sp = strchr(name_buf, '-');
10271 if (!sp)
10272 sp = strchrnul(name_buf, '\n');
10273 *sp = '%';
10274 *(sp + 1) = '\0';
10275
10276 remove_underbar(name_buf);
10277
10278 fclose(input);
10279
10280 sprintf(path, "cpuidle/state%d/time", state);
10281
10282 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
10283 continue;
10284
10285 if (is_deferred_skip(name_buf))
10286 continue;
10287
10288 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0);
10289 }
10290
10291 for (state = 10; state >= 0; --state) {
10292
10293 sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
10294 input = fopen(path, "r");
10295 if (input == NULL)
10296 continue;
10297 if (!fgets(name_buf, sizeof(name_buf), input))
10298 err(1, "%s: failed to read file", path);
10299 /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
10300 sp = strchr(name_buf, '-');
10301 if (!sp)
10302 sp = strchrnul(name_buf, '\n');
10303 *sp = '\0';
10304 fclose(input);
10305
10306 remove_underbar(name_buf);
10307
10308 sprintf(path, "cpuidle/state%d/usage", state);
10309
10310 if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
10311 continue;
10312
10313 if (is_deferred_skip(name_buf))
10314 continue;
10315
10316 add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
10317 }
10318
10319 }
10320
10321 /*
10322 * parse cpuset with following syntax
10323 * 1,2,4..6,8-10 and set bits in cpu_subset
10324 */
parse_cpu_command(char * optarg)10325 void parse_cpu_command(char *optarg)
10326 {
10327 if (!strcmp(optarg, "core")) {
10328 if (cpu_subset)
10329 goto error;
10330 show_core_only++;
10331 return;
10332 }
10333 if (!strcmp(optarg, "package")) {
10334 if (cpu_subset)
10335 goto error;
10336 show_pkg_only++;
10337 return;
10338 }
10339 if (show_core_only || show_pkg_only)
10340 goto error;
10341
10342 cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
10343 if (cpu_subset == NULL)
10344 err(3, "CPU_ALLOC");
10345 cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
10346
10347 CPU_ZERO_S(cpu_subset_size, cpu_subset);
10348
10349 if (parse_cpu_str(optarg, cpu_subset, cpu_subset_size))
10350 goto error;
10351
10352 return;
10353
10354 error:
10355 fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
10356 help();
10357 exit(-1);
10358 }
10359
cmdline(int argc,char ** argv)10360 void cmdline(int argc, char **argv)
10361 {
10362 int opt;
10363 int option_index = 0;
10364 static struct option long_options[] = {
10365 { "add", required_argument, 0, 'a' },
10366 { "cpu", required_argument, 0, 'c' },
10367 { "Dump", no_argument, 0, 'D' },
10368 { "debug", no_argument, 0, 'd' }, /* internal, not documented */
10369 { "enable", required_argument, 0, 'e' },
10370 { "force", no_argument, 0, 'f' },
10371 { "interval", required_argument, 0, 'i' },
10372 { "IPC", no_argument, 0, 'I' },
10373 { "num_iterations", required_argument, 0, 'n' },
10374 { "header_iterations", required_argument, 0, 'N' },
10375 { "help", no_argument, 0, 'h' },
10376 { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help
10377 { "Joules", no_argument, 0, 'J' },
10378 { "list", no_argument, 0, 'l' },
10379 { "out", required_argument, 0, 'o' },
10380 { "quiet", no_argument, 0, 'q' },
10381 { "no-msr", no_argument, 0, 'M' },
10382 { "no-perf", no_argument, 0, 'P' },
10383 { "show", required_argument, 0, 's' },
10384 { "Summary", no_argument, 0, 'S' },
10385 { "TCC", required_argument, 0, 'T' },
10386 { "version", no_argument, 0, 'v' },
10387 { 0, 0, 0, 0 }
10388 };
10389
10390 progname = argv[0];
10391
10392 /*
10393 * Parse some options early, because they may make other options invalid,
10394 * like adding the MSR counter with --add and at the same time using --no-msr.
10395 */
10396 while ((opt = getopt_long_only(argc, argv, "+MPn:", long_options, &option_index)) != -1) {
10397 switch (opt) {
10398 case 'M':
10399 no_msr = 1;
10400 break;
10401 case 'P':
10402 no_perf = 1;
10403 break;
10404 default:
10405 break;
10406 }
10407 }
10408 optind = 0;
10409
10410 while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
10411 switch (opt) {
10412 case 'a':
10413 parse_add_command(optarg);
10414 break;
10415 case 'c':
10416 parse_cpu_command(optarg);
10417 break;
10418 case 'D':
10419 dump_only++;
10420 /*
10421 * Force the no_perf early to prevent using it as a source.
10422 * User asks for raw values, but perf returns them relative
10423 * to the opening of the file descriptor.
10424 */
10425 no_perf = 1;
10426 break;
10427 case 'e':
10428 /* --enable specified counter */
10429 bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
10430 break;
10431 case 'f':
10432 force_load++;
10433 break;
10434 case 'd':
10435 debug++;
10436 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
10437 break;
10438 case 'H':
10439 /*
10440 * --hide: do not show those specified
10441 * multiple invocations simply clear more bits in enabled mask
10442 */
10443 bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
10444 break;
10445 case 'h':
10446 default:
10447 help();
10448 exit(1);
10449 case 'i':
10450 {
10451 double interval = strtod(optarg, NULL);
10452
10453 if (interval < 0.001) {
10454 fprintf(outf, "interval %f seconds is too small\n", interval);
10455 exit(2);
10456 }
10457
10458 interval_tv.tv_sec = interval_ts.tv_sec = interval;
10459 interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
10460 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
10461 }
10462 break;
10463 case 'J':
10464 rapl_joules++;
10465 break;
10466 case 'l':
10467 ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
10468 list_header_only++;
10469 quiet++;
10470 break;
10471 case 'o':
10472 outf = fopen_or_die(optarg, "w");
10473 break;
10474 case 'q':
10475 quiet = 1;
10476 break;
10477 case 'M':
10478 case 'P':
10479 /* Parsed earlier */
10480 break;
10481 case 'n':
10482 num_iterations = strtod(optarg, NULL);
10483
10484 if (num_iterations <= 0) {
10485 fprintf(outf, "iterations %d should be positive number\n", num_iterations);
10486 exit(2);
10487 }
10488 break;
10489 case 'N':
10490 header_iterations = strtod(optarg, NULL);
10491
10492 if (header_iterations <= 0) {
10493 fprintf(outf, "iterations %d should be positive number\n", header_iterations);
10494 exit(2);
10495 }
10496 break;
10497 case 's':
10498 /*
10499 * --show: show only those specified
10500 * The 1st invocation will clear and replace the enabled mask
10501 * subsequent invocations can add to it.
10502 */
10503 if (shown == 0)
10504 bic_enabled = bic_lookup(optarg, SHOW_LIST);
10505 else
10506 bic_enabled |= bic_lookup(optarg, SHOW_LIST);
10507 shown = 1;
10508 break;
10509 case 'S':
10510 summary_only++;
10511 break;
10512 case 'T':
10513 tj_max_override = atoi(optarg);
10514 break;
10515 case 'v':
10516 print_version();
10517 exit(0);
10518 break;
10519 }
10520 }
10521 }
10522
set_rlimit(void)10523 void set_rlimit(void)
10524 {
10525 struct rlimit limit;
10526
10527 if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
10528 err(1, "Failed to get rlimit");
10529
10530 if (limit.rlim_max < MAX_NOFILE)
10531 limit.rlim_max = MAX_NOFILE;
10532 if (limit.rlim_cur < MAX_NOFILE)
10533 limit.rlim_cur = MAX_NOFILE;
10534
10535 if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
10536 err(1, "Failed to set rlimit");
10537 }
10538
main(int argc,char ** argv)10539 int main(int argc, char **argv)
10540 {
10541 int fd, ret;
10542
10543 fd = open("/sys/fs/cgroup/cgroup.procs", O_WRONLY);
10544 if (fd < 0)
10545 goto skip_cgroup_setting;
10546
10547 ret = write(fd, "0\n", 2);
10548 if (ret == -1)
10549 perror("Can't update cgroup\n");
10550
10551 close(fd);
10552
10553 skip_cgroup_setting:
10554 outf = stderr;
10555 cmdline(argc, argv);
10556
10557 if (!quiet) {
10558 print_version();
10559 print_bootcmd();
10560 }
10561
10562 probe_sysfs();
10563
10564 if (!getuid())
10565 set_rlimit();
10566
10567 turbostat_init();
10568
10569 if (!no_msr)
10570 msr_sum_record();
10571
10572 /* dump counters and exit */
10573 if (dump_only)
10574 return get_and_dump_counters();
10575
10576 /* list header and exit */
10577 if (list_header_only) {
10578 print_header(",");
10579 flush_output_stdout();
10580 return 0;
10581 }
10582
10583 /*
10584 * if any params left, it must be a command to fork
10585 */
10586 if (argc - optind)
10587 return fork_it(argv + optind);
10588 else
10589 turbostat_loop();
10590
10591 return 0;
10592 }
10593