1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * User interface for Resource Allocation in Resource Director Technology(RDT)
4 *
5 * Copyright (C) 2016 Intel Corporation
6 *
7 * Author: Fenghua Yu <[email protected]>
8 *
9 * More information about RDT be found in the Intel (R) x86 Architecture
10 * Software Developer Manual.
11 */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/cpu.h>
16 #include <linux/debugfs.h>
17 #include <linux/fs.h>
18 #include <linux/fs_parser.h>
19 #include <linux/sysfs.h>
20 #include <linux/kernfs.h>
21 #include <linux/seq_buf.h>
22 #include <linux/seq_file.h>
23 #include <linux/sched/signal.h>
24 #include <linux/sched/task.h>
25 #include <linux/slab.h>
26 #include <linux/task_work.h>
27 #include <linux/user_namespace.h>
28
29 #include <uapi/linux/magic.h>
30
31 #include <asm/resctrl.h>
32 #include "internal.h"
33
34 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
35 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
36 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
37
38 /* Mutex to protect rdtgroup access. */
39 DEFINE_MUTEX(rdtgroup_mutex);
40
41 static struct kernfs_root *rdt_root;
42 struct rdtgroup rdtgroup_default;
43 LIST_HEAD(rdt_all_groups);
44
45 /* list of entries for the schemata file */
46 LIST_HEAD(resctrl_schema_all);
47
48 /* The filesystem can only be mounted once. */
49 bool resctrl_mounted;
50
51 /* Kernel fs node for "info" directory under root */
52 static struct kernfs_node *kn_info;
53
54 /* Kernel fs node for "mon_groups" directory under root */
55 static struct kernfs_node *kn_mongrp;
56
57 /* Kernel fs node for "mon_data" directory under root */
58 static struct kernfs_node *kn_mondata;
59
60 static struct seq_buf last_cmd_status;
61 static char last_cmd_status_buf[512];
62
63 static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
64 static void rdtgroup_destroy_root(void);
65
66 struct dentry *debugfs_resctrl;
67
68 /*
69 * Memory bandwidth monitoring event to use for the default CTRL_MON group
70 * and each new CTRL_MON group created by the user. Only relevant when
71 * the filesystem is mounted with the "mba_MBps" option so it does not
72 * matter that it remains uninitialized on systems that do not support
73 * the "mba_MBps" option.
74 */
75 enum resctrl_event_id mba_mbps_default_event;
76
77 static bool resctrl_debug;
78
rdt_last_cmd_clear(void)79 void rdt_last_cmd_clear(void)
80 {
81 lockdep_assert_held(&rdtgroup_mutex);
82 seq_buf_clear(&last_cmd_status);
83 }
84
rdt_last_cmd_puts(const char * s)85 void rdt_last_cmd_puts(const char *s)
86 {
87 lockdep_assert_held(&rdtgroup_mutex);
88 seq_buf_puts(&last_cmd_status, s);
89 }
90
rdt_last_cmd_printf(const char * fmt,...)91 void rdt_last_cmd_printf(const char *fmt, ...)
92 {
93 va_list ap;
94
95 va_start(ap, fmt);
96 lockdep_assert_held(&rdtgroup_mutex);
97 seq_buf_vprintf(&last_cmd_status, fmt, ap);
98 va_end(ap);
99 }
100
rdt_staged_configs_clear(void)101 void rdt_staged_configs_clear(void)
102 {
103 struct rdt_ctrl_domain *dom;
104 struct rdt_resource *r;
105
106 lockdep_assert_held(&rdtgroup_mutex);
107
108 for_each_alloc_capable_rdt_resource(r) {
109 list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
110 memset(dom->staged_config, 0, sizeof(dom->staged_config));
111 }
112 }
113
114 /*
115 * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
116 * we can keep a bitmap of free CLOSIDs in a single integer.
117 *
118 * Using a global CLOSID across all resources has some advantages and
119 * some drawbacks:
120 * + We can simply set current's closid to assign a task to a resource
121 * group.
122 * + Context switch code can avoid extra memory references deciding which
123 * CLOSID to load into the PQR_ASSOC MSR
124 * - We give up some options in configuring resource groups across multi-socket
125 * systems.
126 * - Our choices on how to configure each resource become progressively more
127 * limited as the number of resources grows.
128 */
129 static unsigned long closid_free_map;
130 static int closid_free_map_len;
131
closids_supported(void)132 int closids_supported(void)
133 {
134 return closid_free_map_len;
135 }
136
closid_init(void)137 static void closid_init(void)
138 {
139 struct resctrl_schema *s;
140 u32 rdt_min_closid = 32;
141
142 /* Compute rdt_min_closid across all resources */
143 list_for_each_entry(s, &resctrl_schema_all, list)
144 rdt_min_closid = min(rdt_min_closid, s->num_closid);
145
146 closid_free_map = BIT_MASK(rdt_min_closid) - 1;
147
148 /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
149 __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map);
150 closid_free_map_len = rdt_min_closid;
151 }
152
closid_alloc(void)153 static int closid_alloc(void)
154 {
155 int cleanest_closid;
156 u32 closid;
157
158 lockdep_assert_held(&rdtgroup_mutex);
159
160 if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
161 is_llc_occupancy_enabled()) {
162 cleanest_closid = resctrl_find_cleanest_closid();
163 if (cleanest_closid < 0)
164 return cleanest_closid;
165 closid = cleanest_closid;
166 } else {
167 closid = ffs(closid_free_map);
168 if (closid == 0)
169 return -ENOSPC;
170 closid--;
171 }
172 __clear_bit(closid, &closid_free_map);
173
174 return closid;
175 }
176
closid_free(int closid)177 void closid_free(int closid)
178 {
179 lockdep_assert_held(&rdtgroup_mutex);
180
181 __set_bit(closid, &closid_free_map);
182 }
183
184 /**
185 * closid_allocated - test if provided closid is in use
186 * @closid: closid to be tested
187 *
188 * Return: true if @closid is currently associated with a resource group,
189 * false if @closid is free
190 */
closid_allocated(unsigned int closid)191 bool closid_allocated(unsigned int closid)
192 {
193 lockdep_assert_held(&rdtgroup_mutex);
194
195 return !test_bit(closid, &closid_free_map);
196 }
197
198 /**
199 * rdtgroup_mode_by_closid - Return mode of resource group with closid
200 * @closid: closid if the resource group
201 *
202 * Each resource group is associated with a @closid. Here the mode
203 * of a resource group can be queried by searching for it using its closid.
204 *
205 * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
206 */
rdtgroup_mode_by_closid(int closid)207 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
208 {
209 struct rdtgroup *rdtgrp;
210
211 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
212 if (rdtgrp->closid == closid)
213 return rdtgrp->mode;
214 }
215
216 return RDT_NUM_MODES;
217 }
218
219 static const char * const rdt_mode_str[] = {
220 [RDT_MODE_SHAREABLE] = "shareable",
221 [RDT_MODE_EXCLUSIVE] = "exclusive",
222 [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup",
223 [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked",
224 };
225
226 /**
227 * rdtgroup_mode_str - Return the string representation of mode
228 * @mode: the resource group mode as &enum rdtgroup_mode
229 *
230 * Return: string representation of valid mode, "unknown" otherwise
231 */
rdtgroup_mode_str(enum rdtgrp_mode mode)232 static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
233 {
234 if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
235 return "unknown";
236
237 return rdt_mode_str[mode];
238 }
239
240 /* set uid and gid of rdtgroup dirs and files to that of the creator */
rdtgroup_kn_set_ugid(struct kernfs_node * kn)241 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
242 {
243 struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
244 .ia_uid = current_fsuid(),
245 .ia_gid = current_fsgid(), };
246
247 if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
248 gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
249 return 0;
250
251 return kernfs_setattr(kn, &iattr);
252 }
253
rdtgroup_add_file(struct kernfs_node * parent_kn,struct rftype * rft)254 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
255 {
256 struct kernfs_node *kn;
257 int ret;
258
259 kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
260 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
261 0, rft->kf_ops, rft, NULL, NULL);
262 if (IS_ERR(kn))
263 return PTR_ERR(kn);
264
265 ret = rdtgroup_kn_set_ugid(kn);
266 if (ret) {
267 kernfs_remove(kn);
268 return ret;
269 }
270
271 return 0;
272 }
273
rdtgroup_seqfile_show(struct seq_file * m,void * arg)274 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
275 {
276 struct kernfs_open_file *of = m->private;
277 struct rftype *rft = of->kn->priv;
278
279 if (rft->seq_show)
280 return rft->seq_show(of, m, arg);
281 return 0;
282 }
283
rdtgroup_file_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)284 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
285 size_t nbytes, loff_t off)
286 {
287 struct rftype *rft = of->kn->priv;
288
289 if (rft->write)
290 return rft->write(of, buf, nbytes, off);
291
292 return -EINVAL;
293 }
294
295 static const struct kernfs_ops rdtgroup_kf_single_ops = {
296 .atomic_write_len = PAGE_SIZE,
297 .write = rdtgroup_file_write,
298 .seq_show = rdtgroup_seqfile_show,
299 };
300
301 static const struct kernfs_ops kf_mondata_ops = {
302 .atomic_write_len = PAGE_SIZE,
303 .seq_show = rdtgroup_mondata_show,
304 };
305
is_cpu_list(struct kernfs_open_file * of)306 static bool is_cpu_list(struct kernfs_open_file *of)
307 {
308 struct rftype *rft = of->kn->priv;
309
310 return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
311 }
312
rdtgroup_cpus_show(struct kernfs_open_file * of,struct seq_file * s,void * v)313 static int rdtgroup_cpus_show(struct kernfs_open_file *of,
314 struct seq_file *s, void *v)
315 {
316 struct rdtgroup *rdtgrp;
317 struct cpumask *mask;
318 int ret = 0;
319
320 rdtgrp = rdtgroup_kn_lock_live(of->kn);
321
322 if (rdtgrp) {
323 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
324 if (!rdtgrp->plr->d) {
325 rdt_last_cmd_clear();
326 rdt_last_cmd_puts("Cache domain offline\n");
327 ret = -ENODEV;
328 } else {
329 mask = &rdtgrp->plr->d->hdr.cpu_mask;
330 seq_printf(s, is_cpu_list(of) ?
331 "%*pbl\n" : "%*pb\n",
332 cpumask_pr_args(mask));
333 }
334 } else {
335 seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
336 cpumask_pr_args(&rdtgrp->cpu_mask));
337 }
338 } else {
339 ret = -ENOENT;
340 }
341 rdtgroup_kn_unlock(of->kn);
342
343 return ret;
344 }
345
346 /*
347 * This is safe against resctrl_sched_in() called from __switch_to()
348 * because __switch_to() is executed with interrupts disabled. A local call
349 * from update_closid_rmid() is protected against __switch_to() because
350 * preemption is disabled.
351 */
update_cpu_closid_rmid(void * info)352 static void update_cpu_closid_rmid(void *info)
353 {
354 struct rdtgroup *r = info;
355
356 if (r) {
357 this_cpu_write(pqr_state.default_closid, r->closid);
358 this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
359 }
360
361 /*
362 * We cannot unconditionally write the MSR because the current
363 * executing task might have its own closid selected. Just reuse
364 * the context switch code.
365 */
366 resctrl_sched_in(current);
367 }
368
369 /*
370 * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
371 *
372 * Per task closids/rmids must have been set up before calling this function.
373 */
374 static void
update_closid_rmid(const struct cpumask * cpu_mask,struct rdtgroup * r)375 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
376 {
377 on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1);
378 }
379
cpus_mon_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask)380 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
381 cpumask_var_t tmpmask)
382 {
383 struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
384 struct list_head *head;
385
386 /* Check whether cpus belong to parent ctrl group */
387 cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
388 if (!cpumask_empty(tmpmask)) {
389 rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
390 return -EINVAL;
391 }
392
393 /* Check whether cpus are dropped from this group */
394 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
395 if (!cpumask_empty(tmpmask)) {
396 /* Give any dropped cpus to parent rdtgroup */
397 cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
398 update_closid_rmid(tmpmask, prgrp);
399 }
400
401 /*
402 * If we added cpus, remove them from previous group that owned them
403 * and update per-cpu rmid
404 */
405 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
406 if (!cpumask_empty(tmpmask)) {
407 head = &prgrp->mon.crdtgrp_list;
408 list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
409 if (crgrp == rdtgrp)
410 continue;
411 cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
412 tmpmask);
413 }
414 update_closid_rmid(tmpmask, rdtgrp);
415 }
416
417 /* Done pushing/pulling - update this group with new mask */
418 cpumask_copy(&rdtgrp->cpu_mask, newmask);
419
420 return 0;
421 }
422
cpumask_rdtgrp_clear(struct rdtgroup * r,struct cpumask * m)423 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
424 {
425 struct rdtgroup *crgrp;
426
427 cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
428 /* update the child mon group masks as well*/
429 list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
430 cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
431 }
432
cpus_ctrl_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask,cpumask_var_t tmpmask1)433 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
434 cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
435 {
436 struct rdtgroup *r, *crgrp;
437 struct list_head *head;
438
439 /* Check whether cpus are dropped from this group */
440 cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
441 if (!cpumask_empty(tmpmask)) {
442 /* Can't drop from default group */
443 if (rdtgrp == &rdtgroup_default) {
444 rdt_last_cmd_puts("Can't drop CPUs from default group\n");
445 return -EINVAL;
446 }
447
448 /* Give any dropped cpus to rdtgroup_default */
449 cpumask_or(&rdtgroup_default.cpu_mask,
450 &rdtgroup_default.cpu_mask, tmpmask);
451 update_closid_rmid(tmpmask, &rdtgroup_default);
452 }
453
454 /*
455 * If we added cpus, remove them from previous group and
456 * the prev group's child groups that owned them
457 * and update per-cpu closid/rmid.
458 */
459 cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
460 if (!cpumask_empty(tmpmask)) {
461 list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
462 if (r == rdtgrp)
463 continue;
464 cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
465 if (!cpumask_empty(tmpmask1))
466 cpumask_rdtgrp_clear(r, tmpmask1);
467 }
468 update_closid_rmid(tmpmask, rdtgrp);
469 }
470
471 /* Done pushing/pulling - update this group with new mask */
472 cpumask_copy(&rdtgrp->cpu_mask, newmask);
473
474 /*
475 * Clear child mon group masks since there is a new parent mask
476 * now and update the rmid for the cpus the child lost.
477 */
478 head = &rdtgrp->mon.crdtgrp_list;
479 list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
480 cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
481 update_closid_rmid(tmpmask, rdtgrp);
482 cpumask_clear(&crgrp->cpu_mask);
483 }
484
485 return 0;
486 }
487
rdtgroup_cpus_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)488 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
489 char *buf, size_t nbytes, loff_t off)
490 {
491 cpumask_var_t tmpmask, newmask, tmpmask1;
492 struct rdtgroup *rdtgrp;
493 int ret;
494
495 if (!buf)
496 return -EINVAL;
497
498 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
499 return -ENOMEM;
500 if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
501 free_cpumask_var(tmpmask);
502 return -ENOMEM;
503 }
504 if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
505 free_cpumask_var(tmpmask);
506 free_cpumask_var(newmask);
507 return -ENOMEM;
508 }
509
510 rdtgrp = rdtgroup_kn_lock_live(of->kn);
511 if (!rdtgrp) {
512 ret = -ENOENT;
513 goto unlock;
514 }
515
516 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
517 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
518 ret = -EINVAL;
519 rdt_last_cmd_puts("Pseudo-locking in progress\n");
520 goto unlock;
521 }
522
523 if (is_cpu_list(of))
524 ret = cpulist_parse(buf, newmask);
525 else
526 ret = cpumask_parse(buf, newmask);
527
528 if (ret) {
529 rdt_last_cmd_puts("Bad CPU list/mask\n");
530 goto unlock;
531 }
532
533 /* check that user didn't specify any offline cpus */
534 cpumask_andnot(tmpmask, newmask, cpu_online_mask);
535 if (!cpumask_empty(tmpmask)) {
536 ret = -EINVAL;
537 rdt_last_cmd_puts("Can only assign online CPUs\n");
538 goto unlock;
539 }
540
541 if (rdtgrp->type == RDTCTRL_GROUP)
542 ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
543 else if (rdtgrp->type == RDTMON_GROUP)
544 ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
545 else
546 ret = -EINVAL;
547
548 unlock:
549 rdtgroup_kn_unlock(of->kn);
550 free_cpumask_var(tmpmask);
551 free_cpumask_var(newmask);
552 free_cpumask_var(tmpmask1);
553
554 return ret ?: nbytes;
555 }
556
557 /**
558 * rdtgroup_remove - the helper to remove resource group safely
559 * @rdtgrp: resource group to remove
560 *
561 * On resource group creation via a mkdir, an extra kernfs_node reference is
562 * taken to ensure that the rdtgroup structure remains accessible for the
563 * rdtgroup_kn_unlock() calls where it is removed.
564 *
565 * Drop the extra reference here, then free the rdtgroup structure.
566 *
567 * Return: void
568 */
rdtgroup_remove(struct rdtgroup * rdtgrp)569 static void rdtgroup_remove(struct rdtgroup *rdtgrp)
570 {
571 kernfs_put(rdtgrp->kn);
572 kfree(rdtgrp);
573 }
574
_update_task_closid_rmid(void * task)575 static void _update_task_closid_rmid(void *task)
576 {
577 /*
578 * If the task is still current on this CPU, update PQR_ASSOC MSR.
579 * Otherwise, the MSR is updated when the task is scheduled in.
580 */
581 if (task == current)
582 resctrl_sched_in(task);
583 }
584
update_task_closid_rmid(struct task_struct * t)585 static void update_task_closid_rmid(struct task_struct *t)
586 {
587 if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
588 smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
589 else
590 _update_task_closid_rmid(t);
591 }
592
task_in_rdtgroup(struct task_struct * tsk,struct rdtgroup * rdtgrp)593 static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
594 {
595 u32 closid, rmid = rdtgrp->mon.rmid;
596
597 if (rdtgrp->type == RDTCTRL_GROUP)
598 closid = rdtgrp->closid;
599 else if (rdtgrp->type == RDTMON_GROUP)
600 closid = rdtgrp->mon.parent->closid;
601 else
602 return false;
603
604 return resctrl_arch_match_closid(tsk, closid) &&
605 resctrl_arch_match_rmid(tsk, closid, rmid);
606 }
607
__rdtgroup_move_task(struct task_struct * tsk,struct rdtgroup * rdtgrp)608 static int __rdtgroup_move_task(struct task_struct *tsk,
609 struct rdtgroup *rdtgrp)
610 {
611 /* If the task is already in rdtgrp, no need to move the task. */
612 if (task_in_rdtgroup(tsk, rdtgrp))
613 return 0;
614
615 /*
616 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
617 * updated by them.
618 *
619 * For ctrl_mon groups, move both closid and rmid.
620 * For monitor groups, can move the tasks only from
621 * their parent CTRL group.
622 */
623 if (rdtgrp->type == RDTMON_GROUP &&
624 !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
625 rdt_last_cmd_puts("Can't move task to different control group\n");
626 return -EINVAL;
627 }
628
629 if (rdtgrp->type == RDTMON_GROUP)
630 resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
631 rdtgrp->mon.rmid);
632 else
633 resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
634 rdtgrp->mon.rmid);
635
636 /*
637 * Ensure the task's closid and rmid are written before determining if
638 * the task is current that will decide if it will be interrupted.
639 * This pairs with the full barrier between the rq->curr update and
640 * resctrl_sched_in() during context switch.
641 */
642 smp_mb();
643
644 /*
645 * By now, the task's closid and rmid are set. If the task is current
646 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
647 * group go into effect. If the task is not current, the MSR will be
648 * updated when the task is scheduled in.
649 */
650 update_task_closid_rmid(tsk);
651
652 return 0;
653 }
654
is_closid_match(struct task_struct * t,struct rdtgroup * r)655 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
656 {
657 return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
658 resctrl_arch_match_closid(t, r->closid));
659 }
660
is_rmid_match(struct task_struct * t,struct rdtgroup * r)661 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
662 {
663 return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
664 resctrl_arch_match_rmid(t, r->mon.parent->closid,
665 r->mon.rmid));
666 }
667
668 /**
669 * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
670 * @r: Resource group
671 *
672 * Return: 1 if tasks have been assigned to @r, 0 otherwise
673 */
rdtgroup_tasks_assigned(struct rdtgroup * r)674 int rdtgroup_tasks_assigned(struct rdtgroup *r)
675 {
676 struct task_struct *p, *t;
677 int ret = 0;
678
679 lockdep_assert_held(&rdtgroup_mutex);
680
681 rcu_read_lock();
682 for_each_process_thread(p, t) {
683 if (is_closid_match(t, r) || is_rmid_match(t, r)) {
684 ret = 1;
685 break;
686 }
687 }
688 rcu_read_unlock();
689
690 return ret;
691 }
692
rdtgroup_task_write_permission(struct task_struct * task,struct kernfs_open_file * of)693 static int rdtgroup_task_write_permission(struct task_struct *task,
694 struct kernfs_open_file *of)
695 {
696 const struct cred *tcred = get_task_cred(task);
697 const struct cred *cred = current_cred();
698 int ret = 0;
699
700 /*
701 * Even if we're attaching all tasks in the thread group, we only
702 * need to check permissions on one of them.
703 */
704 if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
705 !uid_eq(cred->euid, tcred->uid) &&
706 !uid_eq(cred->euid, tcred->suid)) {
707 rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
708 ret = -EPERM;
709 }
710
711 put_cred(tcred);
712 return ret;
713 }
714
rdtgroup_move_task(pid_t pid,struct rdtgroup * rdtgrp,struct kernfs_open_file * of)715 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
716 struct kernfs_open_file *of)
717 {
718 struct task_struct *tsk;
719 int ret;
720
721 rcu_read_lock();
722 if (pid) {
723 tsk = find_task_by_vpid(pid);
724 if (!tsk) {
725 rcu_read_unlock();
726 rdt_last_cmd_printf("No task %d\n", pid);
727 return -ESRCH;
728 }
729 } else {
730 tsk = current;
731 }
732
733 get_task_struct(tsk);
734 rcu_read_unlock();
735
736 ret = rdtgroup_task_write_permission(tsk, of);
737 if (!ret)
738 ret = __rdtgroup_move_task(tsk, rdtgrp);
739
740 put_task_struct(tsk);
741 return ret;
742 }
743
rdtgroup_tasks_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)744 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
745 char *buf, size_t nbytes, loff_t off)
746 {
747 struct rdtgroup *rdtgrp;
748 char *pid_str;
749 int ret = 0;
750 pid_t pid;
751
752 rdtgrp = rdtgroup_kn_lock_live(of->kn);
753 if (!rdtgrp) {
754 rdtgroup_kn_unlock(of->kn);
755 return -ENOENT;
756 }
757 rdt_last_cmd_clear();
758
759 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
760 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
761 ret = -EINVAL;
762 rdt_last_cmd_puts("Pseudo-locking in progress\n");
763 goto unlock;
764 }
765
766 while (buf && buf[0] != '\0' && buf[0] != '\n') {
767 pid_str = strim(strsep(&buf, ","));
768
769 if (kstrtoint(pid_str, 0, &pid)) {
770 rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
771 ret = -EINVAL;
772 break;
773 }
774
775 if (pid < 0) {
776 rdt_last_cmd_printf("Invalid pid %d\n", pid);
777 ret = -EINVAL;
778 break;
779 }
780
781 ret = rdtgroup_move_task(pid, rdtgrp, of);
782 if (ret) {
783 rdt_last_cmd_printf("Error while processing task %d\n", pid);
784 break;
785 }
786 }
787
788 unlock:
789 rdtgroup_kn_unlock(of->kn);
790
791 return ret ?: nbytes;
792 }
793
show_rdt_tasks(struct rdtgroup * r,struct seq_file * s)794 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
795 {
796 struct task_struct *p, *t;
797 pid_t pid;
798
799 rcu_read_lock();
800 for_each_process_thread(p, t) {
801 if (is_closid_match(t, r) || is_rmid_match(t, r)) {
802 pid = task_pid_vnr(t);
803 if (pid)
804 seq_printf(s, "%d\n", pid);
805 }
806 }
807 rcu_read_unlock();
808 }
809
rdtgroup_tasks_show(struct kernfs_open_file * of,struct seq_file * s,void * v)810 static int rdtgroup_tasks_show(struct kernfs_open_file *of,
811 struct seq_file *s, void *v)
812 {
813 struct rdtgroup *rdtgrp;
814 int ret = 0;
815
816 rdtgrp = rdtgroup_kn_lock_live(of->kn);
817 if (rdtgrp)
818 show_rdt_tasks(rdtgrp, s);
819 else
820 ret = -ENOENT;
821 rdtgroup_kn_unlock(of->kn);
822
823 return ret;
824 }
825
rdtgroup_closid_show(struct kernfs_open_file * of,struct seq_file * s,void * v)826 static int rdtgroup_closid_show(struct kernfs_open_file *of,
827 struct seq_file *s, void *v)
828 {
829 struct rdtgroup *rdtgrp;
830 int ret = 0;
831
832 rdtgrp = rdtgroup_kn_lock_live(of->kn);
833 if (rdtgrp)
834 seq_printf(s, "%u\n", rdtgrp->closid);
835 else
836 ret = -ENOENT;
837 rdtgroup_kn_unlock(of->kn);
838
839 return ret;
840 }
841
rdtgroup_rmid_show(struct kernfs_open_file * of,struct seq_file * s,void * v)842 static int rdtgroup_rmid_show(struct kernfs_open_file *of,
843 struct seq_file *s, void *v)
844 {
845 struct rdtgroup *rdtgrp;
846 int ret = 0;
847
848 rdtgrp = rdtgroup_kn_lock_live(of->kn);
849 if (rdtgrp)
850 seq_printf(s, "%u\n", rdtgrp->mon.rmid);
851 else
852 ret = -ENOENT;
853 rdtgroup_kn_unlock(of->kn);
854
855 return ret;
856 }
857
858 #ifdef CONFIG_PROC_CPU_RESCTRL
859
860 /*
861 * A task can only be part of one resctrl control group and of one monitor
862 * group which is associated to that control group.
863 *
864 * 1) res:
865 * mon:
866 *
867 * resctrl is not available.
868 *
869 * 2) res:/
870 * mon:
871 *
872 * Task is part of the root resctrl control group, and it is not associated
873 * to any monitor group.
874 *
875 * 3) res:/
876 * mon:mon0
877 *
878 * Task is part of the root resctrl control group and monitor group mon0.
879 *
880 * 4) res:group0
881 * mon:
882 *
883 * Task is part of resctrl control group group0, and it is not associated
884 * to any monitor group.
885 *
886 * 5) res:group0
887 * mon:mon1
888 *
889 * Task is part of resctrl control group group0 and monitor group mon1.
890 */
proc_resctrl_show(struct seq_file * s,struct pid_namespace * ns,struct pid * pid,struct task_struct * tsk)891 int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
892 struct pid *pid, struct task_struct *tsk)
893 {
894 struct rdtgroup *rdtg;
895 int ret = 0;
896
897 mutex_lock(&rdtgroup_mutex);
898
899 /* Return empty if resctrl has not been mounted. */
900 if (!resctrl_mounted) {
901 seq_puts(s, "res:\nmon:\n");
902 goto unlock;
903 }
904
905 list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
906 struct rdtgroup *crg;
907
908 /*
909 * Task information is only relevant for shareable
910 * and exclusive groups.
911 */
912 if (rdtg->mode != RDT_MODE_SHAREABLE &&
913 rdtg->mode != RDT_MODE_EXCLUSIVE)
914 continue;
915
916 if (!resctrl_arch_match_closid(tsk, rdtg->closid))
917 continue;
918
919 seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
920 rdtg->kn->name);
921 seq_puts(s, "mon:");
922 list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
923 mon.crdtgrp_list) {
924 if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
925 crg->mon.rmid))
926 continue;
927 seq_printf(s, "%s", crg->kn->name);
928 break;
929 }
930 seq_putc(s, '\n');
931 goto unlock;
932 }
933 /*
934 * The above search should succeed. Otherwise return
935 * with an error.
936 */
937 ret = -ENOENT;
938 unlock:
939 mutex_unlock(&rdtgroup_mutex);
940
941 return ret;
942 }
943 #endif
944
rdt_last_cmd_status_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)945 static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
946 struct seq_file *seq, void *v)
947 {
948 int len;
949
950 mutex_lock(&rdtgroup_mutex);
951 len = seq_buf_used(&last_cmd_status);
952 if (len)
953 seq_printf(seq, "%.*s", len, last_cmd_status_buf);
954 else
955 seq_puts(seq, "ok\n");
956 mutex_unlock(&rdtgroup_mutex);
957 return 0;
958 }
959
rdt_num_closids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)960 static int rdt_num_closids_show(struct kernfs_open_file *of,
961 struct seq_file *seq, void *v)
962 {
963 struct resctrl_schema *s = of->kn->parent->priv;
964
965 seq_printf(seq, "%u\n", s->num_closid);
966 return 0;
967 }
968
rdt_default_ctrl_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)969 static int rdt_default_ctrl_show(struct kernfs_open_file *of,
970 struct seq_file *seq, void *v)
971 {
972 struct resctrl_schema *s = of->kn->parent->priv;
973 struct rdt_resource *r = s->res;
974
975 seq_printf(seq, "%x\n", r->default_ctrl);
976 return 0;
977 }
978
rdt_min_cbm_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)979 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
980 struct seq_file *seq, void *v)
981 {
982 struct resctrl_schema *s = of->kn->parent->priv;
983 struct rdt_resource *r = s->res;
984
985 seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
986 return 0;
987 }
988
rdt_shareable_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)989 static int rdt_shareable_bits_show(struct kernfs_open_file *of,
990 struct seq_file *seq, void *v)
991 {
992 struct resctrl_schema *s = of->kn->parent->priv;
993 struct rdt_resource *r = s->res;
994
995 seq_printf(seq, "%x\n", r->cache.shareable_bits);
996 return 0;
997 }
998
999 /*
1000 * rdt_bit_usage_show - Display current usage of resources
1001 *
1002 * A domain is a shared resource that can now be allocated differently. Here
1003 * we display the current regions of the domain as an annotated bitmask.
1004 * For each domain of this resource its allocation bitmask
1005 * is annotated as below to indicate the current usage of the corresponding bit:
1006 * 0 - currently unused
1007 * X - currently available for sharing and used by software and hardware
1008 * H - currently used by hardware only but available for software use
1009 * S - currently used and shareable by software only
1010 * E - currently used exclusively by one resource group
1011 * P - currently pseudo-locked by one resource group
1012 */
rdt_bit_usage_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1013 static int rdt_bit_usage_show(struct kernfs_open_file *of,
1014 struct seq_file *seq, void *v)
1015 {
1016 struct resctrl_schema *s = of->kn->parent->priv;
1017 /*
1018 * Use unsigned long even though only 32 bits are used to ensure
1019 * test_bit() is used safely.
1020 */
1021 unsigned long sw_shareable = 0, hw_shareable = 0;
1022 unsigned long exclusive = 0, pseudo_locked = 0;
1023 struct rdt_resource *r = s->res;
1024 struct rdt_ctrl_domain *dom;
1025 int i, hwb, swb, excl, psl;
1026 enum rdtgrp_mode mode;
1027 bool sep = false;
1028 u32 ctrl_val;
1029
1030 cpus_read_lock();
1031 mutex_lock(&rdtgroup_mutex);
1032 hw_shareable = r->cache.shareable_bits;
1033 list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
1034 if (sep)
1035 seq_putc(seq, ';');
1036 sw_shareable = 0;
1037 exclusive = 0;
1038 seq_printf(seq, "%d=", dom->hdr.id);
1039 for (i = 0; i < closids_supported(); i++) {
1040 if (!closid_allocated(i))
1041 continue;
1042 ctrl_val = resctrl_arch_get_config(r, dom, i,
1043 s->conf_type);
1044 mode = rdtgroup_mode_by_closid(i);
1045 switch (mode) {
1046 case RDT_MODE_SHAREABLE:
1047 sw_shareable |= ctrl_val;
1048 break;
1049 case RDT_MODE_EXCLUSIVE:
1050 exclusive |= ctrl_val;
1051 break;
1052 case RDT_MODE_PSEUDO_LOCKSETUP:
1053 /*
1054 * RDT_MODE_PSEUDO_LOCKSETUP is possible
1055 * here but not included since the CBM
1056 * associated with this CLOSID in this mode
1057 * is not initialized and no task or cpu can be
1058 * assigned this CLOSID.
1059 */
1060 break;
1061 case RDT_MODE_PSEUDO_LOCKED:
1062 case RDT_NUM_MODES:
1063 WARN(1,
1064 "invalid mode for closid %d\n", i);
1065 break;
1066 }
1067 }
1068 for (i = r->cache.cbm_len - 1; i >= 0; i--) {
1069 pseudo_locked = dom->plr ? dom->plr->cbm : 0;
1070 hwb = test_bit(i, &hw_shareable);
1071 swb = test_bit(i, &sw_shareable);
1072 excl = test_bit(i, &exclusive);
1073 psl = test_bit(i, &pseudo_locked);
1074 if (hwb && swb)
1075 seq_putc(seq, 'X');
1076 else if (hwb && !swb)
1077 seq_putc(seq, 'H');
1078 else if (!hwb && swb)
1079 seq_putc(seq, 'S');
1080 else if (excl)
1081 seq_putc(seq, 'E');
1082 else if (psl)
1083 seq_putc(seq, 'P');
1084 else /* Unused bits remain */
1085 seq_putc(seq, '0');
1086 }
1087 sep = true;
1088 }
1089 seq_putc(seq, '\n');
1090 mutex_unlock(&rdtgroup_mutex);
1091 cpus_read_unlock();
1092 return 0;
1093 }
1094
rdt_min_bw_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1095 static int rdt_min_bw_show(struct kernfs_open_file *of,
1096 struct seq_file *seq, void *v)
1097 {
1098 struct resctrl_schema *s = of->kn->parent->priv;
1099 struct rdt_resource *r = s->res;
1100
1101 seq_printf(seq, "%u\n", r->membw.min_bw);
1102 return 0;
1103 }
1104
rdt_num_rmids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1105 static int rdt_num_rmids_show(struct kernfs_open_file *of,
1106 struct seq_file *seq, void *v)
1107 {
1108 struct rdt_resource *r = of->kn->parent->priv;
1109
1110 seq_printf(seq, "%d\n", r->num_rmid);
1111
1112 return 0;
1113 }
1114
rdt_mon_features_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1115 static int rdt_mon_features_show(struct kernfs_open_file *of,
1116 struct seq_file *seq, void *v)
1117 {
1118 struct rdt_resource *r = of->kn->parent->priv;
1119 struct mon_evt *mevt;
1120
1121 list_for_each_entry(mevt, &r->evt_list, list) {
1122 seq_printf(seq, "%s\n", mevt->name);
1123 if (mevt->configurable)
1124 seq_printf(seq, "%s_config\n", mevt->name);
1125 }
1126
1127 return 0;
1128 }
1129
rdt_bw_gran_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1130 static int rdt_bw_gran_show(struct kernfs_open_file *of,
1131 struct seq_file *seq, void *v)
1132 {
1133 struct resctrl_schema *s = of->kn->parent->priv;
1134 struct rdt_resource *r = s->res;
1135
1136 seq_printf(seq, "%u\n", r->membw.bw_gran);
1137 return 0;
1138 }
1139
rdt_delay_linear_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1140 static int rdt_delay_linear_show(struct kernfs_open_file *of,
1141 struct seq_file *seq, void *v)
1142 {
1143 struct resctrl_schema *s = of->kn->parent->priv;
1144 struct rdt_resource *r = s->res;
1145
1146 seq_printf(seq, "%u\n", r->membw.delay_linear);
1147 return 0;
1148 }
1149
max_threshold_occ_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1150 static int max_threshold_occ_show(struct kernfs_open_file *of,
1151 struct seq_file *seq, void *v)
1152 {
1153 seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
1154
1155 return 0;
1156 }
1157
rdt_thread_throttle_mode_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1158 static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
1159 struct seq_file *seq, void *v)
1160 {
1161 struct resctrl_schema *s = of->kn->parent->priv;
1162 struct rdt_resource *r = s->res;
1163
1164 if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
1165 seq_puts(seq, "per-thread\n");
1166 else
1167 seq_puts(seq, "max\n");
1168
1169 return 0;
1170 }
1171
max_threshold_occ_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1172 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
1173 char *buf, size_t nbytes, loff_t off)
1174 {
1175 unsigned int bytes;
1176 int ret;
1177
1178 ret = kstrtouint(buf, 0, &bytes);
1179 if (ret)
1180 return ret;
1181
1182 if (bytes > resctrl_rmid_realloc_limit)
1183 return -EINVAL;
1184
1185 resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
1186
1187 return nbytes;
1188 }
1189
1190 /*
1191 * rdtgroup_mode_show - Display mode of this resource group
1192 */
rdtgroup_mode_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1193 static int rdtgroup_mode_show(struct kernfs_open_file *of,
1194 struct seq_file *s, void *v)
1195 {
1196 struct rdtgroup *rdtgrp;
1197
1198 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1199 if (!rdtgrp) {
1200 rdtgroup_kn_unlock(of->kn);
1201 return -ENOENT;
1202 }
1203
1204 seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
1205
1206 rdtgroup_kn_unlock(of->kn);
1207 return 0;
1208 }
1209
resctrl_peer_type(enum resctrl_conf_type my_type)1210 static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
1211 {
1212 switch (my_type) {
1213 case CDP_CODE:
1214 return CDP_DATA;
1215 case CDP_DATA:
1216 return CDP_CODE;
1217 default:
1218 case CDP_NONE:
1219 return CDP_NONE;
1220 }
1221 }
1222
rdt_has_sparse_bitmasks_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1223 static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
1224 struct seq_file *seq, void *v)
1225 {
1226 struct resctrl_schema *s = of->kn->parent->priv;
1227 struct rdt_resource *r = s->res;
1228
1229 seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
1230
1231 return 0;
1232 }
1233
1234 /**
1235 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1236 * @r: Resource to which domain instance @d belongs.
1237 * @d: The domain instance for which @closid is being tested.
1238 * @cbm: Capacity bitmask being tested.
1239 * @closid: Intended closid for @cbm.
1240 * @type: CDP type of @r.
1241 * @exclusive: Only check if overlaps with exclusive resource groups
1242 *
1243 * Checks if provided @cbm intended to be used for @closid on domain
1244 * @d overlaps with any other closids or other hardware usage associated
1245 * with this domain. If @exclusive is true then only overlaps with
1246 * resource groups in exclusive mode will be considered. If @exclusive
1247 * is false then overlaps with any resource group or hardware entities
1248 * will be considered.
1249 *
1250 * @cbm is unsigned long, even if only 32 bits are used, to make the
1251 * bitmap functions work correctly.
1252 *
1253 * Return: false if CBM does not overlap, true if it does.
1254 */
__rdtgroup_cbm_overlaps(struct rdt_resource * r,struct rdt_ctrl_domain * d,unsigned long cbm,int closid,enum resctrl_conf_type type,bool exclusive)1255 static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
1256 unsigned long cbm, int closid,
1257 enum resctrl_conf_type type, bool exclusive)
1258 {
1259 enum rdtgrp_mode mode;
1260 unsigned long ctrl_b;
1261 int i;
1262
1263 /* Check for any overlap with regions used by hardware directly */
1264 if (!exclusive) {
1265 ctrl_b = r->cache.shareable_bits;
1266 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1267 return true;
1268 }
1269
1270 /* Check for overlap with other resource groups */
1271 for (i = 0; i < closids_supported(); i++) {
1272 ctrl_b = resctrl_arch_get_config(r, d, i, type);
1273 mode = rdtgroup_mode_by_closid(i);
1274 if (closid_allocated(i) && i != closid &&
1275 mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1276 if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1277 if (exclusive) {
1278 if (mode == RDT_MODE_EXCLUSIVE)
1279 return true;
1280 continue;
1281 }
1282 return true;
1283 }
1284 }
1285 }
1286
1287 return false;
1288 }
1289
1290 /**
1291 * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1292 * @s: Schema for the resource to which domain instance @d belongs.
1293 * @d: The domain instance for which @closid is being tested.
1294 * @cbm: Capacity bitmask being tested.
1295 * @closid: Intended closid for @cbm.
1296 * @exclusive: Only check if overlaps with exclusive resource groups
1297 *
1298 * Resources that can be allocated using a CBM can use the CBM to control
1299 * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1300 * for overlap. Overlap test is not limited to the specific resource for
1301 * which the CBM is intended though - when dealing with CDP resources that
1302 * share the underlying hardware the overlap check should be performed on
1303 * the CDP resource sharing the hardware also.
1304 *
1305 * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1306 * overlap test.
1307 *
1308 * Return: true if CBM overlap detected, false if there is no overlap
1309 */
rdtgroup_cbm_overlaps(struct resctrl_schema * s,struct rdt_ctrl_domain * d,unsigned long cbm,int closid,bool exclusive)1310 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
1311 unsigned long cbm, int closid, bool exclusive)
1312 {
1313 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
1314 struct rdt_resource *r = s->res;
1315
1316 if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
1317 exclusive))
1318 return true;
1319
1320 if (!resctrl_arch_get_cdp_enabled(r->rid))
1321 return false;
1322 return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
1323 }
1324
1325 /**
1326 * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1327 * @rdtgrp: Resource group identified through its closid.
1328 *
1329 * An exclusive resource group implies that there should be no sharing of
1330 * its allocated resources. At the time this group is considered to be
1331 * exclusive this test can determine if its current schemata supports this
1332 * setting by testing for overlap with all other resource groups.
1333 *
1334 * Return: true if resource group can be exclusive, false if there is overlap
1335 * with allocations of other resource groups and thus this resource group
1336 * cannot be exclusive.
1337 */
rdtgroup_mode_test_exclusive(struct rdtgroup * rdtgrp)1338 static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1339 {
1340 int closid = rdtgrp->closid;
1341 struct rdt_ctrl_domain *d;
1342 struct resctrl_schema *s;
1343 struct rdt_resource *r;
1344 bool has_cache = false;
1345 u32 ctrl;
1346
1347 /* Walking r->domains, ensure it can't race with cpuhp */
1348 lockdep_assert_cpus_held();
1349
1350 list_for_each_entry(s, &resctrl_schema_all, list) {
1351 r = s->res;
1352 if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
1353 continue;
1354 has_cache = true;
1355 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1356 ctrl = resctrl_arch_get_config(r, d, closid,
1357 s->conf_type);
1358 if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
1359 rdt_last_cmd_puts("Schemata overlaps\n");
1360 return false;
1361 }
1362 }
1363 }
1364
1365 if (!has_cache) {
1366 rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1367 return false;
1368 }
1369
1370 return true;
1371 }
1372
1373 /*
1374 * rdtgroup_mode_write - Modify the resource group's mode
1375 */
rdtgroup_mode_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1376 static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1377 char *buf, size_t nbytes, loff_t off)
1378 {
1379 struct rdtgroup *rdtgrp;
1380 enum rdtgrp_mode mode;
1381 int ret = 0;
1382
1383 /* Valid input requires a trailing newline */
1384 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1385 return -EINVAL;
1386 buf[nbytes - 1] = '\0';
1387
1388 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1389 if (!rdtgrp) {
1390 rdtgroup_kn_unlock(of->kn);
1391 return -ENOENT;
1392 }
1393
1394 rdt_last_cmd_clear();
1395
1396 mode = rdtgrp->mode;
1397
1398 if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1399 (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1400 (!strcmp(buf, "pseudo-locksetup") &&
1401 mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1402 (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1403 goto out;
1404
1405 if (mode == RDT_MODE_PSEUDO_LOCKED) {
1406 rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1407 ret = -EINVAL;
1408 goto out;
1409 }
1410
1411 if (!strcmp(buf, "shareable")) {
1412 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1413 ret = rdtgroup_locksetup_exit(rdtgrp);
1414 if (ret)
1415 goto out;
1416 }
1417 rdtgrp->mode = RDT_MODE_SHAREABLE;
1418 } else if (!strcmp(buf, "exclusive")) {
1419 if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1420 ret = -EINVAL;
1421 goto out;
1422 }
1423 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1424 ret = rdtgroup_locksetup_exit(rdtgrp);
1425 if (ret)
1426 goto out;
1427 }
1428 rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1429 } else if (!strcmp(buf, "pseudo-locksetup")) {
1430 ret = rdtgroup_locksetup_enter(rdtgrp);
1431 if (ret)
1432 goto out;
1433 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1434 } else {
1435 rdt_last_cmd_puts("Unknown or unsupported mode\n");
1436 ret = -EINVAL;
1437 }
1438
1439 out:
1440 rdtgroup_kn_unlock(of->kn);
1441 return ret ?: nbytes;
1442 }
1443
1444 /**
1445 * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1446 * @r: RDT resource to which @d belongs.
1447 * @d: RDT domain instance.
1448 * @cbm: bitmask for which the size should be computed.
1449 *
1450 * The bitmask provided associated with the RDT domain instance @d will be
1451 * translated into how many bytes it represents. The size in bytes is
1452 * computed by first dividing the total cache size by the CBM length to
1453 * determine how many bytes each bit in the bitmask represents. The result
1454 * is multiplied with the number of bits set in the bitmask.
1455 *
1456 * @cbm is unsigned long, even if only 32 bits are used to make the
1457 * bitmap functions work correctly.
1458 */
rdtgroup_cbm_to_size(struct rdt_resource * r,struct rdt_ctrl_domain * d,unsigned long cbm)1459 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1460 struct rdt_ctrl_domain *d, unsigned long cbm)
1461 {
1462 unsigned int size = 0;
1463 struct cacheinfo *ci;
1464 int num_b;
1465
1466 if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
1467 return size;
1468
1469 num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1470 ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
1471 if (ci)
1472 size = ci->size / r->cache.cbm_len * num_b;
1473
1474 return size;
1475 }
1476
1477 /*
1478 * rdtgroup_size_show - Display size in bytes of allocated regions
1479 *
1480 * The "size" file mirrors the layout of the "schemata" file, printing the
1481 * size in bytes of each region instead of the capacity bitmask.
1482 */
rdtgroup_size_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1483 static int rdtgroup_size_show(struct kernfs_open_file *of,
1484 struct seq_file *s, void *v)
1485 {
1486 struct resctrl_schema *schema;
1487 enum resctrl_conf_type type;
1488 struct rdt_ctrl_domain *d;
1489 struct rdtgroup *rdtgrp;
1490 struct rdt_resource *r;
1491 unsigned int size;
1492 int ret = 0;
1493 u32 closid;
1494 bool sep;
1495 u32 ctrl;
1496
1497 rdtgrp = rdtgroup_kn_lock_live(of->kn);
1498 if (!rdtgrp) {
1499 rdtgroup_kn_unlock(of->kn);
1500 return -ENOENT;
1501 }
1502
1503 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1504 if (!rdtgrp->plr->d) {
1505 rdt_last_cmd_clear();
1506 rdt_last_cmd_puts("Cache domain offline\n");
1507 ret = -ENODEV;
1508 } else {
1509 seq_printf(s, "%*s:", max_name_width,
1510 rdtgrp->plr->s->name);
1511 size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
1512 rdtgrp->plr->d,
1513 rdtgrp->plr->cbm);
1514 seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
1515 }
1516 goto out;
1517 }
1518
1519 closid = rdtgrp->closid;
1520
1521 list_for_each_entry(schema, &resctrl_schema_all, list) {
1522 r = schema->res;
1523 type = schema->conf_type;
1524 sep = false;
1525 seq_printf(s, "%*s:", max_name_width, schema->name);
1526 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1527 if (sep)
1528 seq_putc(s, ';');
1529 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1530 size = 0;
1531 } else {
1532 if (is_mba_sc(r))
1533 ctrl = d->mbps_val[closid];
1534 else
1535 ctrl = resctrl_arch_get_config(r, d,
1536 closid,
1537 type);
1538 if (r->rid == RDT_RESOURCE_MBA ||
1539 r->rid == RDT_RESOURCE_SMBA)
1540 size = ctrl;
1541 else
1542 size = rdtgroup_cbm_to_size(r, d, ctrl);
1543 }
1544 seq_printf(s, "%d=%u", d->hdr.id, size);
1545 sep = true;
1546 }
1547 seq_putc(s, '\n');
1548 }
1549
1550 out:
1551 rdtgroup_kn_unlock(of->kn);
1552
1553 return ret;
1554 }
1555
1556 struct mon_config_info {
1557 u32 evtid;
1558 u32 mon_config;
1559 };
1560
1561 #define INVALID_CONFIG_INDEX UINT_MAX
1562
1563 /**
1564 * mon_event_config_index_get - get the hardware index for the
1565 * configurable event
1566 * @evtid: event id.
1567 *
1568 * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
1569 * 1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
1570 * INVALID_CONFIG_INDEX for invalid evtid
1571 */
mon_event_config_index_get(u32 evtid)1572 static inline unsigned int mon_event_config_index_get(u32 evtid)
1573 {
1574 switch (evtid) {
1575 case QOS_L3_MBM_TOTAL_EVENT_ID:
1576 return 0;
1577 case QOS_L3_MBM_LOCAL_EVENT_ID:
1578 return 1;
1579 default:
1580 /* Should never reach here */
1581 return INVALID_CONFIG_INDEX;
1582 }
1583 }
1584
mon_event_config_read(void * info)1585 static void mon_event_config_read(void *info)
1586 {
1587 struct mon_config_info *mon_info = info;
1588 unsigned int index;
1589 u64 msrval;
1590
1591 index = mon_event_config_index_get(mon_info->evtid);
1592 if (index == INVALID_CONFIG_INDEX) {
1593 pr_warn_once("Invalid event id %d\n", mon_info->evtid);
1594 return;
1595 }
1596 rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
1597
1598 /* Report only the valid event configuration bits */
1599 mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
1600 }
1601
mondata_config_read(struct rdt_mon_domain * d,struct mon_config_info * mon_info)1602 static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info)
1603 {
1604 smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1);
1605 }
1606
mbm_config_show(struct seq_file * s,struct rdt_resource * r,u32 evtid)1607 static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
1608 {
1609 struct mon_config_info mon_info;
1610 struct rdt_mon_domain *dom;
1611 bool sep = false;
1612
1613 cpus_read_lock();
1614 mutex_lock(&rdtgroup_mutex);
1615
1616 list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1617 if (sep)
1618 seq_puts(s, ";");
1619
1620 memset(&mon_info, 0, sizeof(struct mon_config_info));
1621 mon_info.evtid = evtid;
1622 mondata_config_read(dom, &mon_info);
1623
1624 seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
1625 sep = true;
1626 }
1627 seq_puts(s, "\n");
1628
1629 mutex_unlock(&rdtgroup_mutex);
1630 cpus_read_unlock();
1631
1632 return 0;
1633 }
1634
mbm_total_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1635 static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
1636 struct seq_file *seq, void *v)
1637 {
1638 struct rdt_resource *r = of->kn->parent->priv;
1639
1640 mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
1641
1642 return 0;
1643 }
1644
mbm_local_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1645 static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
1646 struct seq_file *seq, void *v)
1647 {
1648 struct rdt_resource *r = of->kn->parent->priv;
1649
1650 mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
1651
1652 return 0;
1653 }
1654
mon_event_config_write(void * info)1655 static void mon_event_config_write(void *info)
1656 {
1657 struct mon_config_info *mon_info = info;
1658 unsigned int index;
1659
1660 index = mon_event_config_index_get(mon_info->evtid);
1661 if (index == INVALID_CONFIG_INDEX) {
1662 pr_warn_once("Invalid event id %d\n", mon_info->evtid);
1663 return;
1664 }
1665 wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
1666 }
1667
mbm_config_write_domain(struct rdt_resource * r,struct rdt_mon_domain * d,u32 evtid,u32 val)1668 static void mbm_config_write_domain(struct rdt_resource *r,
1669 struct rdt_mon_domain *d, u32 evtid, u32 val)
1670 {
1671 struct mon_config_info mon_info = {0};
1672
1673 /*
1674 * Read the current config value first. If both are the same then
1675 * no need to write it again.
1676 */
1677 mon_info.evtid = evtid;
1678 mondata_config_read(d, &mon_info);
1679 if (mon_info.mon_config == val)
1680 return;
1681
1682 mon_info.mon_config = val;
1683
1684 /*
1685 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
1686 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
1687 * are scoped at the domain level. Writing any of these MSRs
1688 * on one CPU is observed by all the CPUs in the domain.
1689 */
1690 smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write,
1691 &mon_info, 1);
1692
1693 /*
1694 * When an Event Configuration is changed, the bandwidth counters
1695 * for all RMIDs and Events will be cleared by the hardware. The
1696 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
1697 * every RMID on the next read to any event for every RMID.
1698 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
1699 * cleared while it is tracked by the hardware. Clear the
1700 * mbm_local and mbm_total counts for all the RMIDs.
1701 */
1702 resctrl_arch_reset_rmid_all(r, d);
1703 }
1704
mon_config_write(struct rdt_resource * r,char * tok,u32 evtid)1705 static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
1706 {
1707 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
1708 char *dom_str = NULL, *id_str;
1709 unsigned long dom_id, val;
1710 struct rdt_mon_domain *d;
1711
1712 /* Walking r->domains, ensure it can't race with cpuhp */
1713 lockdep_assert_cpus_held();
1714
1715 next:
1716 if (!tok || tok[0] == '\0')
1717 return 0;
1718
1719 /* Start processing the strings for each domain */
1720 dom_str = strim(strsep(&tok, ";"));
1721 id_str = strsep(&dom_str, "=");
1722
1723 if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1724 rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
1725 return -EINVAL;
1726 }
1727
1728 if (!dom_str || kstrtoul(dom_str, 16, &val)) {
1729 rdt_last_cmd_puts("Non-numeric event configuration value\n");
1730 return -EINVAL;
1731 }
1732
1733 /* Value from user cannot be more than the supported set of events */
1734 if ((val & hw_res->mbm_cfg_mask) != val) {
1735 rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
1736 hw_res->mbm_cfg_mask);
1737 return -EINVAL;
1738 }
1739
1740 list_for_each_entry(d, &r->mon_domains, hdr.list) {
1741 if (d->hdr.id == dom_id) {
1742 mbm_config_write_domain(r, d, evtid, val);
1743 goto next;
1744 }
1745 }
1746
1747 return -EINVAL;
1748 }
1749
mbm_total_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1750 static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
1751 char *buf, size_t nbytes,
1752 loff_t off)
1753 {
1754 struct rdt_resource *r = of->kn->parent->priv;
1755 int ret;
1756
1757 /* Valid input requires a trailing newline */
1758 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1759 return -EINVAL;
1760
1761 cpus_read_lock();
1762 mutex_lock(&rdtgroup_mutex);
1763
1764 rdt_last_cmd_clear();
1765
1766 buf[nbytes - 1] = '\0';
1767
1768 ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
1769
1770 mutex_unlock(&rdtgroup_mutex);
1771 cpus_read_unlock();
1772
1773 return ret ?: nbytes;
1774 }
1775
mbm_local_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1776 static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
1777 char *buf, size_t nbytes,
1778 loff_t off)
1779 {
1780 struct rdt_resource *r = of->kn->parent->priv;
1781 int ret;
1782
1783 /* Valid input requires a trailing newline */
1784 if (nbytes == 0 || buf[nbytes - 1] != '\n')
1785 return -EINVAL;
1786
1787 cpus_read_lock();
1788 mutex_lock(&rdtgroup_mutex);
1789
1790 rdt_last_cmd_clear();
1791
1792 buf[nbytes - 1] = '\0';
1793
1794 ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
1795
1796 mutex_unlock(&rdtgroup_mutex);
1797 cpus_read_unlock();
1798
1799 return ret ?: nbytes;
1800 }
1801
1802 /* rdtgroup information files for one cache resource. */
1803 static struct rftype res_common_files[] = {
1804 {
1805 .name = "last_cmd_status",
1806 .mode = 0444,
1807 .kf_ops = &rdtgroup_kf_single_ops,
1808 .seq_show = rdt_last_cmd_status_show,
1809 .fflags = RFTYPE_TOP_INFO,
1810 },
1811 {
1812 .name = "num_closids",
1813 .mode = 0444,
1814 .kf_ops = &rdtgroup_kf_single_ops,
1815 .seq_show = rdt_num_closids_show,
1816 .fflags = RFTYPE_CTRL_INFO,
1817 },
1818 {
1819 .name = "mon_features",
1820 .mode = 0444,
1821 .kf_ops = &rdtgroup_kf_single_ops,
1822 .seq_show = rdt_mon_features_show,
1823 .fflags = RFTYPE_MON_INFO,
1824 },
1825 {
1826 .name = "num_rmids",
1827 .mode = 0444,
1828 .kf_ops = &rdtgroup_kf_single_ops,
1829 .seq_show = rdt_num_rmids_show,
1830 .fflags = RFTYPE_MON_INFO,
1831 },
1832 {
1833 .name = "cbm_mask",
1834 .mode = 0444,
1835 .kf_ops = &rdtgroup_kf_single_ops,
1836 .seq_show = rdt_default_ctrl_show,
1837 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1838 },
1839 {
1840 .name = "min_cbm_bits",
1841 .mode = 0444,
1842 .kf_ops = &rdtgroup_kf_single_ops,
1843 .seq_show = rdt_min_cbm_bits_show,
1844 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1845 },
1846 {
1847 .name = "shareable_bits",
1848 .mode = 0444,
1849 .kf_ops = &rdtgroup_kf_single_ops,
1850 .seq_show = rdt_shareable_bits_show,
1851 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1852 },
1853 {
1854 .name = "bit_usage",
1855 .mode = 0444,
1856 .kf_ops = &rdtgroup_kf_single_ops,
1857 .seq_show = rdt_bit_usage_show,
1858 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1859 },
1860 {
1861 .name = "min_bandwidth",
1862 .mode = 0444,
1863 .kf_ops = &rdtgroup_kf_single_ops,
1864 .seq_show = rdt_min_bw_show,
1865 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1866 },
1867 {
1868 .name = "bandwidth_gran",
1869 .mode = 0444,
1870 .kf_ops = &rdtgroup_kf_single_ops,
1871 .seq_show = rdt_bw_gran_show,
1872 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1873 },
1874 {
1875 .name = "delay_linear",
1876 .mode = 0444,
1877 .kf_ops = &rdtgroup_kf_single_ops,
1878 .seq_show = rdt_delay_linear_show,
1879 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1880 },
1881 /*
1882 * Platform specific which (if any) capabilities are provided by
1883 * thread_throttle_mode. Defer "fflags" initialization to platform
1884 * discovery.
1885 */
1886 {
1887 .name = "thread_throttle_mode",
1888 .mode = 0444,
1889 .kf_ops = &rdtgroup_kf_single_ops,
1890 .seq_show = rdt_thread_throttle_mode_show,
1891 },
1892 {
1893 .name = "max_threshold_occupancy",
1894 .mode = 0644,
1895 .kf_ops = &rdtgroup_kf_single_ops,
1896 .write = max_threshold_occ_write,
1897 .seq_show = max_threshold_occ_show,
1898 .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
1899 },
1900 {
1901 .name = "mbm_total_bytes_config",
1902 .mode = 0644,
1903 .kf_ops = &rdtgroup_kf_single_ops,
1904 .seq_show = mbm_total_bytes_config_show,
1905 .write = mbm_total_bytes_config_write,
1906 },
1907 {
1908 .name = "mbm_local_bytes_config",
1909 .mode = 0644,
1910 .kf_ops = &rdtgroup_kf_single_ops,
1911 .seq_show = mbm_local_bytes_config_show,
1912 .write = mbm_local_bytes_config_write,
1913 },
1914 {
1915 .name = "cpus",
1916 .mode = 0644,
1917 .kf_ops = &rdtgroup_kf_single_ops,
1918 .write = rdtgroup_cpus_write,
1919 .seq_show = rdtgroup_cpus_show,
1920 .fflags = RFTYPE_BASE,
1921 },
1922 {
1923 .name = "cpus_list",
1924 .mode = 0644,
1925 .kf_ops = &rdtgroup_kf_single_ops,
1926 .write = rdtgroup_cpus_write,
1927 .seq_show = rdtgroup_cpus_show,
1928 .flags = RFTYPE_FLAGS_CPUS_LIST,
1929 .fflags = RFTYPE_BASE,
1930 },
1931 {
1932 .name = "tasks",
1933 .mode = 0644,
1934 .kf_ops = &rdtgroup_kf_single_ops,
1935 .write = rdtgroup_tasks_write,
1936 .seq_show = rdtgroup_tasks_show,
1937 .fflags = RFTYPE_BASE,
1938 },
1939 {
1940 .name = "mon_hw_id",
1941 .mode = 0444,
1942 .kf_ops = &rdtgroup_kf_single_ops,
1943 .seq_show = rdtgroup_rmid_show,
1944 .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG,
1945 },
1946 {
1947 .name = "schemata",
1948 .mode = 0644,
1949 .kf_ops = &rdtgroup_kf_single_ops,
1950 .write = rdtgroup_schemata_write,
1951 .seq_show = rdtgroup_schemata_show,
1952 .fflags = RFTYPE_CTRL_BASE,
1953 },
1954 {
1955 .name = "mba_MBps_event",
1956 .mode = 0644,
1957 .kf_ops = &rdtgroup_kf_single_ops,
1958 .write = rdtgroup_mba_mbps_event_write,
1959 .seq_show = rdtgroup_mba_mbps_event_show,
1960 },
1961 {
1962 .name = "mode",
1963 .mode = 0644,
1964 .kf_ops = &rdtgroup_kf_single_ops,
1965 .write = rdtgroup_mode_write,
1966 .seq_show = rdtgroup_mode_show,
1967 .fflags = RFTYPE_CTRL_BASE,
1968 },
1969 {
1970 .name = "size",
1971 .mode = 0444,
1972 .kf_ops = &rdtgroup_kf_single_ops,
1973 .seq_show = rdtgroup_size_show,
1974 .fflags = RFTYPE_CTRL_BASE,
1975 },
1976 {
1977 .name = "sparse_masks",
1978 .mode = 0444,
1979 .kf_ops = &rdtgroup_kf_single_ops,
1980 .seq_show = rdt_has_sparse_bitmasks_show,
1981 .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1982 },
1983 {
1984 .name = "ctrl_hw_id",
1985 .mode = 0444,
1986 .kf_ops = &rdtgroup_kf_single_ops,
1987 .seq_show = rdtgroup_closid_show,
1988 .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
1989 },
1990
1991 };
1992
rdtgroup_add_files(struct kernfs_node * kn,unsigned long fflags)1993 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1994 {
1995 struct rftype *rfts, *rft;
1996 int ret, len;
1997
1998 rfts = res_common_files;
1999 len = ARRAY_SIZE(res_common_files);
2000
2001 lockdep_assert_held(&rdtgroup_mutex);
2002
2003 if (resctrl_debug)
2004 fflags |= RFTYPE_DEBUG;
2005
2006 for (rft = rfts; rft < rfts + len; rft++) {
2007 if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
2008 ret = rdtgroup_add_file(kn, rft);
2009 if (ret)
2010 goto error;
2011 }
2012 }
2013
2014 return 0;
2015 error:
2016 pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
2017 while (--rft >= rfts) {
2018 if ((fflags & rft->fflags) == rft->fflags)
2019 kernfs_remove_by_name(kn, rft->name);
2020 }
2021 return ret;
2022 }
2023
rdtgroup_get_rftype_by_name(const char * name)2024 static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
2025 {
2026 struct rftype *rfts, *rft;
2027 int len;
2028
2029 rfts = res_common_files;
2030 len = ARRAY_SIZE(res_common_files);
2031
2032 for (rft = rfts; rft < rfts + len; rft++) {
2033 if (!strcmp(rft->name, name))
2034 return rft;
2035 }
2036
2037 return NULL;
2038 }
2039
resctrl_file_fflags_init(const char * config,unsigned long fflags)2040 void resctrl_file_fflags_init(const char *config, unsigned long fflags)
2041 {
2042 struct rftype *rft;
2043
2044 rft = rdtgroup_get_rftype_by_name(config);
2045 if (rft)
2046 rft->fflags = fflags;
2047 }
2048
2049 /**
2050 * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
2051 * @r: The resource group with which the file is associated.
2052 * @name: Name of the file
2053 *
2054 * The permissions of named resctrl file, directory, or link are modified
2055 * to not allow read, write, or execute by any user.
2056 *
2057 * WARNING: This function is intended to communicate to the user that the
2058 * resctrl file has been locked down - that it is not relevant to the
2059 * particular state the system finds itself in. It should not be relied
2060 * on to protect from user access because after the file's permissions
2061 * are restricted the user can still change the permissions using chmod
2062 * from the command line.
2063 *
2064 * Return: 0 on success, <0 on failure.
2065 */
rdtgroup_kn_mode_restrict(struct rdtgroup * r,const char * name)2066 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
2067 {
2068 struct iattr iattr = {.ia_valid = ATTR_MODE,};
2069 struct kernfs_node *kn;
2070 int ret = 0;
2071
2072 kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2073 if (!kn)
2074 return -ENOENT;
2075
2076 switch (kernfs_type(kn)) {
2077 case KERNFS_DIR:
2078 iattr.ia_mode = S_IFDIR;
2079 break;
2080 case KERNFS_FILE:
2081 iattr.ia_mode = S_IFREG;
2082 break;
2083 case KERNFS_LINK:
2084 iattr.ia_mode = S_IFLNK;
2085 break;
2086 }
2087
2088 ret = kernfs_setattr(kn, &iattr);
2089 kernfs_put(kn);
2090 return ret;
2091 }
2092
2093 /**
2094 * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
2095 * @r: The resource group with which the file is associated.
2096 * @name: Name of the file
2097 * @mask: Mask of permissions that should be restored
2098 *
2099 * Restore the permissions of the named file. If @name is a directory the
2100 * permissions of its parent will be used.
2101 *
2102 * Return: 0 on success, <0 on failure.
2103 */
rdtgroup_kn_mode_restore(struct rdtgroup * r,const char * name,umode_t mask)2104 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
2105 umode_t mask)
2106 {
2107 struct iattr iattr = {.ia_valid = ATTR_MODE,};
2108 struct kernfs_node *kn, *parent;
2109 struct rftype *rfts, *rft;
2110 int ret, len;
2111
2112 rfts = res_common_files;
2113 len = ARRAY_SIZE(res_common_files);
2114
2115 for (rft = rfts; rft < rfts + len; rft++) {
2116 if (!strcmp(rft->name, name))
2117 iattr.ia_mode = rft->mode & mask;
2118 }
2119
2120 kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2121 if (!kn)
2122 return -ENOENT;
2123
2124 switch (kernfs_type(kn)) {
2125 case KERNFS_DIR:
2126 parent = kernfs_get_parent(kn);
2127 if (parent) {
2128 iattr.ia_mode |= parent->mode;
2129 kernfs_put(parent);
2130 }
2131 iattr.ia_mode |= S_IFDIR;
2132 break;
2133 case KERNFS_FILE:
2134 iattr.ia_mode |= S_IFREG;
2135 break;
2136 case KERNFS_LINK:
2137 iattr.ia_mode |= S_IFLNK;
2138 break;
2139 }
2140
2141 ret = kernfs_setattr(kn, &iattr);
2142 kernfs_put(kn);
2143 return ret;
2144 }
2145
rdtgroup_mkdir_info_resdir(void * priv,char * name,unsigned long fflags)2146 static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
2147 unsigned long fflags)
2148 {
2149 struct kernfs_node *kn_subdir;
2150 int ret;
2151
2152 kn_subdir = kernfs_create_dir(kn_info, name,
2153 kn_info->mode, priv);
2154 if (IS_ERR(kn_subdir))
2155 return PTR_ERR(kn_subdir);
2156
2157 ret = rdtgroup_kn_set_ugid(kn_subdir);
2158 if (ret)
2159 return ret;
2160
2161 ret = rdtgroup_add_files(kn_subdir, fflags);
2162 if (!ret)
2163 kernfs_activate(kn_subdir);
2164
2165 return ret;
2166 }
2167
rdtgroup_create_info_dir(struct kernfs_node * parent_kn)2168 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
2169 {
2170 struct resctrl_schema *s;
2171 struct rdt_resource *r;
2172 unsigned long fflags;
2173 char name[32];
2174 int ret;
2175
2176 /* create the directory */
2177 kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
2178 if (IS_ERR(kn_info))
2179 return PTR_ERR(kn_info);
2180
2181 ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
2182 if (ret)
2183 goto out_destroy;
2184
2185 /* loop over enabled controls, these are all alloc_capable */
2186 list_for_each_entry(s, &resctrl_schema_all, list) {
2187 r = s->res;
2188 fflags = r->fflags | RFTYPE_CTRL_INFO;
2189 ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
2190 if (ret)
2191 goto out_destroy;
2192 }
2193
2194 for_each_mon_capable_rdt_resource(r) {
2195 fflags = r->fflags | RFTYPE_MON_INFO;
2196 sprintf(name, "%s_MON", r->name);
2197 ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
2198 if (ret)
2199 goto out_destroy;
2200 }
2201
2202 ret = rdtgroup_kn_set_ugid(kn_info);
2203 if (ret)
2204 goto out_destroy;
2205
2206 kernfs_activate(kn_info);
2207
2208 return 0;
2209
2210 out_destroy:
2211 kernfs_remove(kn_info);
2212 return ret;
2213 }
2214
2215 static int
mongroup_create_dir(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,char * name,struct kernfs_node ** dest_kn)2216 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
2217 char *name, struct kernfs_node **dest_kn)
2218 {
2219 struct kernfs_node *kn;
2220 int ret;
2221
2222 /* create the directory */
2223 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2224 if (IS_ERR(kn))
2225 return PTR_ERR(kn);
2226
2227 if (dest_kn)
2228 *dest_kn = kn;
2229
2230 ret = rdtgroup_kn_set_ugid(kn);
2231 if (ret)
2232 goto out_destroy;
2233
2234 kernfs_activate(kn);
2235
2236 return 0;
2237
2238 out_destroy:
2239 kernfs_remove(kn);
2240 return ret;
2241 }
2242
l3_qos_cfg_update(void * arg)2243 static void l3_qos_cfg_update(void *arg)
2244 {
2245 bool *enable = arg;
2246
2247 wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
2248 }
2249
l2_qos_cfg_update(void * arg)2250 static void l2_qos_cfg_update(void *arg)
2251 {
2252 bool *enable = arg;
2253
2254 wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
2255 }
2256
is_mba_linear(void)2257 static inline bool is_mba_linear(void)
2258 {
2259 return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
2260 }
2261
set_cache_qos_cfg(int level,bool enable)2262 static int set_cache_qos_cfg(int level, bool enable)
2263 {
2264 void (*update)(void *arg);
2265 struct rdt_ctrl_domain *d;
2266 struct rdt_resource *r_l;
2267 cpumask_var_t cpu_mask;
2268 int cpu;
2269
2270 /* Walking r->domains, ensure it can't race with cpuhp */
2271 lockdep_assert_cpus_held();
2272
2273 if (level == RDT_RESOURCE_L3)
2274 update = l3_qos_cfg_update;
2275 else if (level == RDT_RESOURCE_L2)
2276 update = l2_qos_cfg_update;
2277 else
2278 return -EINVAL;
2279
2280 if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2281 return -ENOMEM;
2282
2283 r_l = &rdt_resources_all[level].r_resctrl;
2284 list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
2285 if (r_l->cache.arch_has_per_cpu_cfg)
2286 /* Pick all the CPUs in the domain instance */
2287 for_each_cpu(cpu, &d->hdr.cpu_mask)
2288 cpumask_set_cpu(cpu, cpu_mask);
2289 else
2290 /* Pick one CPU from each domain instance to update MSR */
2291 cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
2292 }
2293
2294 /* Update QOS_CFG MSR on all the CPUs in cpu_mask */
2295 on_each_cpu_mask(cpu_mask, update, &enable, 1);
2296
2297 free_cpumask_var(cpu_mask);
2298
2299 return 0;
2300 }
2301
2302 /* Restore the qos cfg state when a domain comes online */
rdt_domain_reconfigure_cdp(struct rdt_resource * r)2303 void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
2304 {
2305 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2306
2307 if (!r->cdp_capable)
2308 return;
2309
2310 if (r->rid == RDT_RESOURCE_L2)
2311 l2_qos_cfg_update(&hw_res->cdp_enabled);
2312
2313 if (r->rid == RDT_RESOURCE_L3)
2314 l3_qos_cfg_update(&hw_res->cdp_enabled);
2315 }
2316
mba_sc_domain_allocate(struct rdt_resource * r,struct rdt_ctrl_domain * d)2317 static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
2318 {
2319 u32 num_closid = resctrl_arch_get_num_closid(r);
2320 int cpu = cpumask_any(&d->hdr.cpu_mask);
2321 int i;
2322
2323 d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
2324 GFP_KERNEL, cpu_to_node(cpu));
2325 if (!d->mbps_val)
2326 return -ENOMEM;
2327
2328 for (i = 0; i < num_closid; i++)
2329 d->mbps_val[i] = MBA_MAX_MBPS;
2330
2331 return 0;
2332 }
2333
mba_sc_domain_destroy(struct rdt_resource * r,struct rdt_ctrl_domain * d)2334 static void mba_sc_domain_destroy(struct rdt_resource *r,
2335 struct rdt_ctrl_domain *d)
2336 {
2337 kfree(d->mbps_val);
2338 d->mbps_val = NULL;
2339 }
2340
2341 /*
2342 * MBA software controller is supported only if
2343 * MBM is supported and MBA is in linear scale,
2344 * and the MBM monitor scope is the same as MBA
2345 * control scope.
2346 */
supports_mba_mbps(void)2347 static bool supports_mba_mbps(void)
2348 {
2349 struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
2350 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
2351
2352 return (is_mbm_enabled() &&
2353 r->alloc_capable && is_mba_linear() &&
2354 r->ctrl_scope == rmbm->mon_scope);
2355 }
2356
2357 /*
2358 * Enable or disable the MBA software controller
2359 * which helps user specify bandwidth in MBps.
2360 */
set_mba_sc(bool mba_sc)2361 static int set_mba_sc(bool mba_sc)
2362 {
2363 struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
2364 u32 num_closid = resctrl_arch_get_num_closid(r);
2365 struct rdt_ctrl_domain *d;
2366 unsigned long fflags;
2367 int i;
2368
2369 if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
2370 return -EINVAL;
2371
2372 r->membw.mba_sc = mba_sc;
2373
2374 rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
2375
2376 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2377 for (i = 0; i < num_closid; i++)
2378 d->mbps_val[i] = MBA_MAX_MBPS;
2379 }
2380
2381 fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
2382 resctrl_file_fflags_init("mba_MBps_event", fflags);
2383
2384 return 0;
2385 }
2386
cdp_enable(int level)2387 static int cdp_enable(int level)
2388 {
2389 struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
2390 int ret;
2391
2392 if (!r_l->alloc_capable)
2393 return -EINVAL;
2394
2395 ret = set_cache_qos_cfg(level, true);
2396 if (!ret)
2397 rdt_resources_all[level].cdp_enabled = true;
2398
2399 return ret;
2400 }
2401
cdp_disable(int level)2402 static void cdp_disable(int level)
2403 {
2404 struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
2405
2406 if (r_hw->cdp_enabled) {
2407 set_cache_qos_cfg(level, false);
2408 r_hw->cdp_enabled = false;
2409 }
2410 }
2411
resctrl_arch_set_cdp_enabled(enum resctrl_res_level l,bool enable)2412 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
2413 {
2414 struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
2415
2416 if (!hw_res->r_resctrl.cdp_capable)
2417 return -EINVAL;
2418
2419 if (enable)
2420 return cdp_enable(l);
2421
2422 cdp_disable(l);
2423
2424 return 0;
2425 }
2426
2427 /*
2428 * We don't allow rdtgroup directories to be created anywhere
2429 * except the root directory. Thus when looking for the rdtgroup
2430 * structure for a kernfs node we are either looking at a directory,
2431 * in which case the rdtgroup structure is pointed at by the "priv"
2432 * field, otherwise we have a file, and need only look to the parent
2433 * to find the rdtgroup.
2434 */
kernfs_to_rdtgroup(struct kernfs_node * kn)2435 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
2436 {
2437 if (kernfs_type(kn) == KERNFS_DIR) {
2438 /*
2439 * All the resource directories use "kn->priv"
2440 * to point to the "struct rdtgroup" for the
2441 * resource. "info" and its subdirectories don't
2442 * have rdtgroup structures, so return NULL here.
2443 */
2444 if (kn == kn_info || kn->parent == kn_info)
2445 return NULL;
2446 else
2447 return kn->priv;
2448 } else {
2449 return kn->parent->priv;
2450 }
2451 }
2452
rdtgroup_kn_get(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2453 static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2454 {
2455 atomic_inc(&rdtgrp->waitcount);
2456 kernfs_break_active_protection(kn);
2457 }
2458
rdtgroup_kn_put(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2459 static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2460 {
2461 if (atomic_dec_and_test(&rdtgrp->waitcount) &&
2462 (rdtgrp->flags & RDT_DELETED)) {
2463 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2464 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2465 rdtgroup_pseudo_lock_remove(rdtgrp);
2466 kernfs_unbreak_active_protection(kn);
2467 rdtgroup_remove(rdtgrp);
2468 } else {
2469 kernfs_unbreak_active_protection(kn);
2470 }
2471 }
2472
rdtgroup_kn_lock_live(struct kernfs_node * kn)2473 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
2474 {
2475 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2476
2477 if (!rdtgrp)
2478 return NULL;
2479
2480 rdtgroup_kn_get(rdtgrp, kn);
2481
2482 cpus_read_lock();
2483 mutex_lock(&rdtgroup_mutex);
2484
2485 /* Was this group deleted while we waited? */
2486 if (rdtgrp->flags & RDT_DELETED)
2487 return NULL;
2488
2489 return rdtgrp;
2490 }
2491
rdtgroup_kn_unlock(struct kernfs_node * kn)2492 void rdtgroup_kn_unlock(struct kernfs_node *kn)
2493 {
2494 struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2495
2496 if (!rdtgrp)
2497 return;
2498
2499 mutex_unlock(&rdtgroup_mutex);
2500 cpus_read_unlock();
2501
2502 rdtgroup_kn_put(rdtgrp, kn);
2503 }
2504
2505 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2506 struct rdtgroup *prgrp,
2507 struct kernfs_node **mon_data_kn);
2508
rdt_disable_ctx(void)2509 static void rdt_disable_ctx(void)
2510 {
2511 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2512 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2513 set_mba_sc(false);
2514
2515 resctrl_debug = false;
2516 }
2517
rdt_enable_ctx(struct rdt_fs_context * ctx)2518 static int rdt_enable_ctx(struct rdt_fs_context *ctx)
2519 {
2520 int ret = 0;
2521
2522 if (ctx->enable_cdpl2) {
2523 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
2524 if (ret)
2525 goto out_done;
2526 }
2527
2528 if (ctx->enable_cdpl3) {
2529 ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
2530 if (ret)
2531 goto out_cdpl2;
2532 }
2533
2534 if (ctx->enable_mba_mbps) {
2535 ret = set_mba_sc(true);
2536 if (ret)
2537 goto out_cdpl3;
2538 }
2539
2540 if (ctx->enable_debug)
2541 resctrl_debug = true;
2542
2543 return 0;
2544
2545 out_cdpl3:
2546 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2547 out_cdpl2:
2548 resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2549 out_done:
2550 return ret;
2551 }
2552
schemata_list_add(struct rdt_resource * r,enum resctrl_conf_type type)2553 static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
2554 {
2555 struct resctrl_schema *s;
2556 const char *suffix = "";
2557 int ret, cl;
2558
2559 s = kzalloc(sizeof(*s), GFP_KERNEL);
2560 if (!s)
2561 return -ENOMEM;
2562
2563 s->res = r;
2564 s->num_closid = resctrl_arch_get_num_closid(r);
2565 if (resctrl_arch_get_cdp_enabled(r->rid))
2566 s->num_closid /= 2;
2567
2568 s->conf_type = type;
2569 switch (type) {
2570 case CDP_CODE:
2571 suffix = "CODE";
2572 break;
2573 case CDP_DATA:
2574 suffix = "DATA";
2575 break;
2576 case CDP_NONE:
2577 suffix = "";
2578 break;
2579 }
2580
2581 ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
2582 if (ret >= sizeof(s->name)) {
2583 kfree(s);
2584 return -EINVAL;
2585 }
2586
2587 cl = strlen(s->name);
2588
2589 /*
2590 * If CDP is supported by this resource, but not enabled,
2591 * include the suffix. This ensures the tabular format of the
2592 * schemata file does not change between mounts of the filesystem.
2593 */
2594 if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
2595 cl += 4;
2596
2597 if (cl > max_name_width)
2598 max_name_width = cl;
2599
2600 INIT_LIST_HEAD(&s->list);
2601 list_add(&s->list, &resctrl_schema_all);
2602
2603 return 0;
2604 }
2605
schemata_list_create(void)2606 static int schemata_list_create(void)
2607 {
2608 struct rdt_resource *r;
2609 int ret = 0;
2610
2611 for_each_alloc_capable_rdt_resource(r) {
2612 if (resctrl_arch_get_cdp_enabled(r->rid)) {
2613 ret = schemata_list_add(r, CDP_CODE);
2614 if (ret)
2615 break;
2616
2617 ret = schemata_list_add(r, CDP_DATA);
2618 } else {
2619 ret = schemata_list_add(r, CDP_NONE);
2620 }
2621
2622 if (ret)
2623 break;
2624 }
2625
2626 return ret;
2627 }
2628
schemata_list_destroy(void)2629 static void schemata_list_destroy(void)
2630 {
2631 struct resctrl_schema *s, *tmp;
2632
2633 list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
2634 list_del(&s->list);
2635 kfree(s);
2636 }
2637 }
2638
rdt_get_tree(struct fs_context * fc)2639 static int rdt_get_tree(struct fs_context *fc)
2640 {
2641 struct rdt_fs_context *ctx = rdt_fc2context(fc);
2642 unsigned long flags = RFTYPE_CTRL_BASE;
2643 struct rdt_mon_domain *dom;
2644 struct rdt_resource *r;
2645 int ret;
2646
2647 cpus_read_lock();
2648 mutex_lock(&rdtgroup_mutex);
2649 /*
2650 * resctrl file system can only be mounted once.
2651 */
2652 if (resctrl_mounted) {
2653 ret = -EBUSY;
2654 goto out;
2655 }
2656
2657 ret = rdtgroup_setup_root(ctx);
2658 if (ret)
2659 goto out;
2660
2661 ret = rdt_enable_ctx(ctx);
2662 if (ret)
2663 goto out_root;
2664
2665 ret = schemata_list_create();
2666 if (ret) {
2667 schemata_list_destroy();
2668 goto out_ctx;
2669 }
2670
2671 closid_init();
2672
2673 if (resctrl_arch_mon_capable())
2674 flags |= RFTYPE_MON;
2675
2676 ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
2677 if (ret)
2678 goto out_schemata_free;
2679
2680 kernfs_activate(rdtgroup_default.kn);
2681
2682 ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
2683 if (ret < 0)
2684 goto out_schemata_free;
2685
2686 if (resctrl_arch_mon_capable()) {
2687 ret = mongroup_create_dir(rdtgroup_default.kn,
2688 &rdtgroup_default, "mon_groups",
2689 &kn_mongrp);
2690 if (ret < 0)
2691 goto out_info;
2692
2693 ret = mkdir_mondata_all(rdtgroup_default.kn,
2694 &rdtgroup_default, &kn_mondata);
2695 if (ret < 0)
2696 goto out_mongrp;
2697 rdtgroup_default.mon.mon_data_kn = kn_mondata;
2698 }
2699
2700 ret = rdt_pseudo_lock_init();
2701 if (ret)
2702 goto out_mondata;
2703
2704 ret = kernfs_get_tree(fc);
2705 if (ret < 0)
2706 goto out_psl;
2707
2708 if (resctrl_arch_alloc_capable())
2709 resctrl_arch_enable_alloc();
2710 if (resctrl_arch_mon_capable())
2711 resctrl_arch_enable_mon();
2712
2713 if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
2714 resctrl_mounted = true;
2715
2716 if (is_mbm_enabled()) {
2717 r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
2718 list_for_each_entry(dom, &r->mon_domains, hdr.list)
2719 mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
2720 RESCTRL_PICK_ANY_CPU);
2721 }
2722
2723 goto out;
2724
2725 out_psl:
2726 rdt_pseudo_lock_release();
2727 out_mondata:
2728 if (resctrl_arch_mon_capable())
2729 kernfs_remove(kn_mondata);
2730 out_mongrp:
2731 if (resctrl_arch_mon_capable())
2732 kernfs_remove(kn_mongrp);
2733 out_info:
2734 kernfs_remove(kn_info);
2735 out_schemata_free:
2736 schemata_list_destroy();
2737 out_ctx:
2738 rdt_disable_ctx();
2739 out_root:
2740 rdtgroup_destroy_root();
2741 out:
2742 rdt_last_cmd_clear();
2743 mutex_unlock(&rdtgroup_mutex);
2744 cpus_read_unlock();
2745 return ret;
2746 }
2747
2748 enum rdt_param {
2749 Opt_cdp,
2750 Opt_cdpl2,
2751 Opt_mba_mbps,
2752 Opt_debug,
2753 nr__rdt_params
2754 };
2755
2756 static const struct fs_parameter_spec rdt_fs_parameters[] = {
2757 fsparam_flag("cdp", Opt_cdp),
2758 fsparam_flag("cdpl2", Opt_cdpl2),
2759 fsparam_flag("mba_MBps", Opt_mba_mbps),
2760 fsparam_flag("debug", Opt_debug),
2761 {}
2762 };
2763
rdt_parse_param(struct fs_context * fc,struct fs_parameter * param)2764 static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2765 {
2766 struct rdt_fs_context *ctx = rdt_fc2context(fc);
2767 struct fs_parse_result result;
2768 const char *msg;
2769 int opt;
2770
2771 opt = fs_parse(fc, rdt_fs_parameters, param, &result);
2772 if (opt < 0)
2773 return opt;
2774
2775 switch (opt) {
2776 case Opt_cdp:
2777 ctx->enable_cdpl3 = true;
2778 return 0;
2779 case Opt_cdpl2:
2780 ctx->enable_cdpl2 = true;
2781 return 0;
2782 case Opt_mba_mbps:
2783 msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
2784 if (!supports_mba_mbps())
2785 return invalfc(fc, msg);
2786 ctx->enable_mba_mbps = true;
2787 return 0;
2788 case Opt_debug:
2789 ctx->enable_debug = true;
2790 return 0;
2791 }
2792
2793 return -EINVAL;
2794 }
2795
rdt_fs_context_free(struct fs_context * fc)2796 static void rdt_fs_context_free(struct fs_context *fc)
2797 {
2798 struct rdt_fs_context *ctx = rdt_fc2context(fc);
2799
2800 kernfs_free_fs_context(fc);
2801 kfree(ctx);
2802 }
2803
2804 static const struct fs_context_operations rdt_fs_context_ops = {
2805 .free = rdt_fs_context_free,
2806 .parse_param = rdt_parse_param,
2807 .get_tree = rdt_get_tree,
2808 };
2809
rdt_init_fs_context(struct fs_context * fc)2810 static int rdt_init_fs_context(struct fs_context *fc)
2811 {
2812 struct rdt_fs_context *ctx;
2813
2814 ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2815 if (!ctx)
2816 return -ENOMEM;
2817
2818 ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2819 fc->fs_private = &ctx->kfc;
2820 fc->ops = &rdt_fs_context_ops;
2821 put_user_ns(fc->user_ns);
2822 fc->user_ns = get_user_ns(&init_user_ns);
2823 fc->global = true;
2824 return 0;
2825 }
2826
reset_all_ctrls(struct rdt_resource * r)2827 static int reset_all_ctrls(struct rdt_resource *r)
2828 {
2829 struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2830 struct rdt_hw_ctrl_domain *hw_dom;
2831 struct msr_param msr_param;
2832 struct rdt_ctrl_domain *d;
2833 int i;
2834
2835 /* Walking r->domains, ensure it can't race with cpuhp */
2836 lockdep_assert_cpus_held();
2837
2838 msr_param.res = r;
2839 msr_param.low = 0;
2840 msr_param.high = hw_res->num_closid;
2841
2842 /*
2843 * Disable resource control for this resource by setting all
2844 * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
2845 * from each domain to update the MSRs below.
2846 */
2847 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2848 hw_dom = resctrl_to_arch_ctrl_dom(d);
2849
2850 for (i = 0; i < hw_res->num_closid; i++)
2851 hw_dom->ctrl_val[i] = r->default_ctrl;
2852 msr_param.dom = d;
2853 smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
2854 }
2855
2856 return 0;
2857 }
2858
2859 /*
2860 * Move tasks from one to the other group. If @from is NULL, then all tasks
2861 * in the systems are moved unconditionally (used for teardown).
2862 *
2863 * If @mask is not NULL the cpus on which moved tasks are running are set
2864 * in that mask so the update smp function call is restricted to affected
2865 * cpus.
2866 */
rdt_move_group_tasks(struct rdtgroup * from,struct rdtgroup * to,struct cpumask * mask)2867 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2868 struct cpumask *mask)
2869 {
2870 struct task_struct *p, *t;
2871
2872 read_lock(&tasklist_lock);
2873 for_each_process_thread(p, t) {
2874 if (!from || is_closid_match(t, from) ||
2875 is_rmid_match(t, from)) {
2876 resctrl_arch_set_closid_rmid(t, to->closid,
2877 to->mon.rmid);
2878
2879 /*
2880 * Order the closid/rmid stores above before the loads
2881 * in task_curr(). This pairs with the full barrier
2882 * between the rq->curr update and resctrl_sched_in()
2883 * during context switch.
2884 */
2885 smp_mb();
2886
2887 /*
2888 * If the task is on a CPU, set the CPU in the mask.
2889 * The detection is inaccurate as tasks might move or
2890 * schedule before the smp function call takes place.
2891 * In such a case the function call is pointless, but
2892 * there is no other side effect.
2893 */
2894 if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
2895 cpumask_set_cpu(task_cpu(t), mask);
2896 }
2897 }
2898 read_unlock(&tasklist_lock);
2899 }
2900
free_all_child_rdtgrp(struct rdtgroup * rdtgrp)2901 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2902 {
2903 struct rdtgroup *sentry, *stmp;
2904 struct list_head *head;
2905
2906 head = &rdtgrp->mon.crdtgrp_list;
2907 list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2908 free_rmid(sentry->closid, sentry->mon.rmid);
2909 list_del(&sentry->mon.crdtgrp_list);
2910
2911 if (atomic_read(&sentry->waitcount) != 0)
2912 sentry->flags = RDT_DELETED;
2913 else
2914 rdtgroup_remove(sentry);
2915 }
2916 }
2917
2918 /*
2919 * Forcibly remove all of subdirectories under root.
2920 */
rmdir_all_sub(void)2921 static void rmdir_all_sub(void)
2922 {
2923 struct rdtgroup *rdtgrp, *tmp;
2924
2925 /* Move all tasks to the default resource group */
2926 rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2927
2928 list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2929 /* Free any child rmids */
2930 free_all_child_rdtgrp(rdtgrp);
2931
2932 /* Remove each rdtgroup other than root */
2933 if (rdtgrp == &rdtgroup_default)
2934 continue;
2935
2936 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2937 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2938 rdtgroup_pseudo_lock_remove(rdtgrp);
2939
2940 /*
2941 * Give any CPUs back to the default group. We cannot copy
2942 * cpu_online_mask because a CPU might have executed the
2943 * offline callback already, but is still marked online.
2944 */
2945 cpumask_or(&rdtgroup_default.cpu_mask,
2946 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2947
2948 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
2949
2950 kernfs_remove(rdtgrp->kn);
2951 list_del(&rdtgrp->rdtgroup_list);
2952
2953 if (atomic_read(&rdtgrp->waitcount) != 0)
2954 rdtgrp->flags = RDT_DELETED;
2955 else
2956 rdtgroup_remove(rdtgrp);
2957 }
2958 /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2959 update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2960
2961 kernfs_remove(kn_info);
2962 kernfs_remove(kn_mongrp);
2963 kernfs_remove(kn_mondata);
2964 }
2965
rdt_kill_sb(struct super_block * sb)2966 static void rdt_kill_sb(struct super_block *sb)
2967 {
2968 struct rdt_resource *r;
2969
2970 cpus_read_lock();
2971 mutex_lock(&rdtgroup_mutex);
2972
2973 rdt_disable_ctx();
2974
2975 /*Put everything back to default values. */
2976 for_each_alloc_capable_rdt_resource(r)
2977 reset_all_ctrls(r);
2978 rmdir_all_sub();
2979 rdt_pseudo_lock_release();
2980 rdtgroup_default.mode = RDT_MODE_SHAREABLE;
2981 schemata_list_destroy();
2982 rdtgroup_destroy_root();
2983 if (resctrl_arch_alloc_capable())
2984 resctrl_arch_disable_alloc();
2985 if (resctrl_arch_mon_capable())
2986 resctrl_arch_disable_mon();
2987 resctrl_mounted = false;
2988 kernfs_kill_sb(sb);
2989 mutex_unlock(&rdtgroup_mutex);
2990 cpus_read_unlock();
2991 }
2992
2993 static struct file_system_type rdt_fs_type = {
2994 .name = "resctrl",
2995 .init_fs_context = rdt_init_fs_context,
2996 .parameters = rdt_fs_parameters,
2997 .kill_sb = rdt_kill_sb,
2998 };
2999
mon_addfile(struct kernfs_node * parent_kn,const char * name,void * priv)3000 static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
3001 void *priv)
3002 {
3003 struct kernfs_node *kn;
3004 int ret = 0;
3005
3006 kn = __kernfs_create_file(parent_kn, name, 0444,
3007 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
3008 &kf_mondata_ops, priv, NULL, NULL);
3009 if (IS_ERR(kn))
3010 return PTR_ERR(kn);
3011
3012 ret = rdtgroup_kn_set_ugid(kn);
3013 if (ret) {
3014 kernfs_remove(kn);
3015 return ret;
3016 }
3017
3018 return ret;
3019 }
3020
mon_rmdir_one_subdir(struct kernfs_node * pkn,char * name,char * subname)3021 static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
3022 {
3023 struct kernfs_node *kn;
3024
3025 kn = kernfs_find_and_get(pkn, name);
3026 if (!kn)
3027 return;
3028 kernfs_put(kn);
3029
3030 if (kn->dir.subdirs <= 1)
3031 kernfs_remove(kn);
3032 else
3033 kernfs_remove_by_name(kn, subname);
3034 }
3035
3036 /*
3037 * Remove all subdirectories of mon_data of ctrl_mon groups
3038 * and monitor groups for the given domain.
3039 * Remove files and directories containing "sum" of domain data
3040 * when last domain being summed is removed.
3041 */
rmdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_mon_domain * d)3042 static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3043 struct rdt_mon_domain *d)
3044 {
3045 struct rdtgroup *prgrp, *crgrp;
3046 char subname[32];
3047 bool snc_mode;
3048 char name[32];
3049
3050 snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3051 sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3052 if (snc_mode)
3053 sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
3054
3055 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3056 mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
3057
3058 list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
3059 mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
3060 }
3061 }
3062
mon_add_all_files(struct kernfs_node * kn,struct rdt_mon_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp,bool do_sum)3063 static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
3064 struct rdt_resource *r, struct rdtgroup *prgrp,
3065 bool do_sum)
3066 {
3067 struct rmid_read rr = {0};
3068 union mon_data_bits priv;
3069 struct mon_evt *mevt;
3070 int ret;
3071
3072 if (WARN_ON(list_empty(&r->evt_list)))
3073 return -EPERM;
3074
3075 priv.u.rid = r->rid;
3076 priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
3077 priv.u.sum = do_sum;
3078 list_for_each_entry(mevt, &r->evt_list, list) {
3079 priv.u.evtid = mevt->evtid;
3080 ret = mon_addfile(kn, mevt->name, priv.priv);
3081 if (ret)
3082 return ret;
3083
3084 if (!do_sum && is_mbm_event(mevt->evtid))
3085 mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
3086 }
3087
3088 return 0;
3089 }
3090
mkdir_mondata_subdir(struct kernfs_node * parent_kn,struct rdt_mon_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp)3091 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
3092 struct rdt_mon_domain *d,
3093 struct rdt_resource *r, struct rdtgroup *prgrp)
3094 {
3095 struct kernfs_node *kn, *ckn;
3096 char name[32];
3097 bool snc_mode;
3098 int ret = 0;
3099
3100 lockdep_assert_held(&rdtgroup_mutex);
3101
3102 snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3103 sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3104 kn = kernfs_find_and_get(parent_kn, name);
3105 if (kn) {
3106 /*
3107 * rdtgroup_mutex will prevent this directory from being
3108 * removed. No need to keep this hold.
3109 */
3110 kernfs_put(kn);
3111 } else {
3112 kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
3113 if (IS_ERR(kn))
3114 return PTR_ERR(kn);
3115
3116 ret = rdtgroup_kn_set_ugid(kn);
3117 if (ret)
3118 goto out_destroy;
3119 ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
3120 if (ret)
3121 goto out_destroy;
3122 }
3123
3124 if (snc_mode) {
3125 sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
3126 ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
3127 if (IS_ERR(ckn)) {
3128 ret = -EINVAL;
3129 goto out_destroy;
3130 }
3131
3132 ret = rdtgroup_kn_set_ugid(ckn);
3133 if (ret)
3134 goto out_destroy;
3135
3136 ret = mon_add_all_files(ckn, d, r, prgrp, false);
3137 if (ret)
3138 goto out_destroy;
3139 }
3140
3141 kernfs_activate(kn);
3142 return 0;
3143
3144 out_destroy:
3145 kernfs_remove(kn);
3146 return ret;
3147 }
3148
3149 /*
3150 * Add all subdirectories of mon_data for "ctrl_mon" groups
3151 * and "monitor" groups with given domain id.
3152 */
mkdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_mon_domain * d)3153 static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3154 struct rdt_mon_domain *d)
3155 {
3156 struct kernfs_node *parent_kn;
3157 struct rdtgroup *prgrp, *crgrp;
3158 struct list_head *head;
3159
3160 list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3161 parent_kn = prgrp->mon.mon_data_kn;
3162 mkdir_mondata_subdir(parent_kn, d, r, prgrp);
3163
3164 head = &prgrp->mon.crdtgrp_list;
3165 list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
3166 parent_kn = crgrp->mon.mon_data_kn;
3167 mkdir_mondata_subdir(parent_kn, d, r, crgrp);
3168 }
3169 }
3170 }
3171
mkdir_mondata_subdir_alldom(struct kernfs_node * parent_kn,struct rdt_resource * r,struct rdtgroup * prgrp)3172 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
3173 struct rdt_resource *r,
3174 struct rdtgroup *prgrp)
3175 {
3176 struct rdt_mon_domain *dom;
3177 int ret;
3178
3179 /* Walking r->domains, ensure it can't race with cpuhp */
3180 lockdep_assert_cpus_held();
3181
3182 list_for_each_entry(dom, &r->mon_domains, hdr.list) {
3183 ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
3184 if (ret)
3185 return ret;
3186 }
3187
3188 return 0;
3189 }
3190
3191 /*
3192 * This creates a directory mon_data which contains the monitored data.
3193 *
3194 * mon_data has one directory for each domain which are named
3195 * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
3196 * with L3 domain looks as below:
3197 * ./mon_data:
3198 * mon_L3_00
3199 * mon_L3_01
3200 * mon_L3_02
3201 * ...
3202 *
3203 * Each domain directory has one file per event:
3204 * ./mon_L3_00/:
3205 * llc_occupancy
3206 *
3207 */
mkdir_mondata_all(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,struct kernfs_node ** dest_kn)3208 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
3209 struct rdtgroup *prgrp,
3210 struct kernfs_node **dest_kn)
3211 {
3212 struct rdt_resource *r;
3213 struct kernfs_node *kn;
3214 int ret;
3215
3216 /*
3217 * Create the mon_data directory first.
3218 */
3219 ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
3220 if (ret)
3221 return ret;
3222
3223 if (dest_kn)
3224 *dest_kn = kn;
3225
3226 /*
3227 * Create the subdirectories for each domain. Note that all events
3228 * in a domain like L3 are grouped into a resource whose domain is L3
3229 */
3230 for_each_mon_capable_rdt_resource(r) {
3231 ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
3232 if (ret)
3233 goto out_destroy;
3234 }
3235
3236 return 0;
3237
3238 out_destroy:
3239 kernfs_remove(kn);
3240 return ret;
3241 }
3242
3243 /**
3244 * cbm_ensure_valid - Enforce validity on provided CBM
3245 * @_val: Candidate CBM
3246 * @r: RDT resource to which the CBM belongs
3247 *
3248 * The provided CBM represents all cache portions available for use. This
3249 * may be represented by a bitmap that does not consist of contiguous ones
3250 * and thus be an invalid CBM.
3251 * Here the provided CBM is forced to be a valid CBM by only considering
3252 * the first set of contiguous bits as valid and clearing all bits.
3253 * The intention here is to provide a valid default CBM with which a new
3254 * resource group is initialized. The user can follow this with a
3255 * modification to the CBM if the default does not satisfy the
3256 * requirements.
3257 */
cbm_ensure_valid(u32 _val,struct rdt_resource * r)3258 static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
3259 {
3260 unsigned int cbm_len = r->cache.cbm_len;
3261 unsigned long first_bit, zero_bit;
3262 unsigned long val = _val;
3263
3264 if (!val)
3265 return 0;
3266
3267 first_bit = find_first_bit(&val, cbm_len);
3268 zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
3269
3270 /* Clear any remaining bits to ensure contiguous region */
3271 bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
3272 return (u32)val;
3273 }
3274
3275 /*
3276 * Initialize cache resources per RDT domain
3277 *
3278 * Set the RDT domain up to start off with all usable allocations. That is,
3279 * all shareable and unused bits. All-zero CBM is invalid.
3280 */
__init_one_rdt_domain(struct rdt_ctrl_domain * d,struct resctrl_schema * s,u32 closid)3281 static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
3282 u32 closid)
3283 {
3284 enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
3285 enum resctrl_conf_type t = s->conf_type;
3286 struct resctrl_staged_config *cfg;
3287 struct rdt_resource *r = s->res;
3288 u32 used_b = 0, unused_b = 0;
3289 unsigned long tmp_cbm;
3290 enum rdtgrp_mode mode;
3291 u32 peer_ctl, ctrl_val;
3292 int i;
3293
3294 cfg = &d->staged_config[t];
3295 cfg->have_new_ctrl = false;
3296 cfg->new_ctrl = r->cache.shareable_bits;
3297 used_b = r->cache.shareable_bits;
3298 for (i = 0; i < closids_supported(); i++) {
3299 if (closid_allocated(i) && i != closid) {
3300 mode = rdtgroup_mode_by_closid(i);
3301 if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
3302 /*
3303 * ctrl values for locksetup aren't relevant
3304 * until the schemata is written, and the mode
3305 * becomes RDT_MODE_PSEUDO_LOCKED.
3306 */
3307 continue;
3308 /*
3309 * If CDP is active include peer domain's
3310 * usage to ensure there is no overlap
3311 * with an exclusive group.
3312 */
3313 if (resctrl_arch_get_cdp_enabled(r->rid))
3314 peer_ctl = resctrl_arch_get_config(r, d, i,
3315 peer_type);
3316 else
3317 peer_ctl = 0;
3318 ctrl_val = resctrl_arch_get_config(r, d, i,
3319 s->conf_type);
3320 used_b |= ctrl_val | peer_ctl;
3321 if (mode == RDT_MODE_SHAREABLE)
3322 cfg->new_ctrl |= ctrl_val | peer_ctl;
3323 }
3324 }
3325 if (d->plr && d->plr->cbm > 0)
3326 used_b |= d->plr->cbm;
3327 unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
3328 unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
3329 cfg->new_ctrl |= unused_b;
3330 /*
3331 * Force the initial CBM to be valid, user can
3332 * modify the CBM based on system availability.
3333 */
3334 cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
3335 /*
3336 * Assign the u32 CBM to an unsigned long to ensure that
3337 * bitmap_weight() does not access out-of-bound memory.
3338 */
3339 tmp_cbm = cfg->new_ctrl;
3340 if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
3341 rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
3342 return -ENOSPC;
3343 }
3344 cfg->have_new_ctrl = true;
3345
3346 return 0;
3347 }
3348
3349 /*
3350 * Initialize cache resources with default values.
3351 *
3352 * A new RDT group is being created on an allocation capable (CAT)
3353 * supporting system. Set this group up to start off with all usable
3354 * allocations.
3355 *
3356 * If there are no more shareable bits available on any domain then
3357 * the entire allocation will fail.
3358 */
rdtgroup_init_cat(struct resctrl_schema * s,u32 closid)3359 static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
3360 {
3361 struct rdt_ctrl_domain *d;
3362 int ret;
3363
3364 list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
3365 ret = __init_one_rdt_domain(d, s, closid);
3366 if (ret < 0)
3367 return ret;
3368 }
3369
3370 return 0;
3371 }
3372
3373 /* Initialize MBA resource with default values. */
rdtgroup_init_mba(struct rdt_resource * r,u32 closid)3374 static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
3375 {
3376 struct resctrl_staged_config *cfg;
3377 struct rdt_ctrl_domain *d;
3378
3379 list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
3380 if (is_mba_sc(r)) {
3381 d->mbps_val[closid] = MBA_MAX_MBPS;
3382 continue;
3383 }
3384
3385 cfg = &d->staged_config[CDP_NONE];
3386 cfg->new_ctrl = r->default_ctrl;
3387 cfg->have_new_ctrl = true;
3388 }
3389 }
3390
3391 /* Initialize the RDT group's allocations. */
rdtgroup_init_alloc(struct rdtgroup * rdtgrp)3392 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
3393 {
3394 struct resctrl_schema *s;
3395 struct rdt_resource *r;
3396 int ret = 0;
3397
3398 rdt_staged_configs_clear();
3399
3400 list_for_each_entry(s, &resctrl_schema_all, list) {
3401 r = s->res;
3402 if (r->rid == RDT_RESOURCE_MBA ||
3403 r->rid == RDT_RESOURCE_SMBA) {
3404 rdtgroup_init_mba(r, rdtgrp->closid);
3405 if (is_mba_sc(r))
3406 continue;
3407 } else {
3408 ret = rdtgroup_init_cat(s, rdtgrp->closid);
3409 if (ret < 0)
3410 goto out;
3411 }
3412
3413 ret = resctrl_arch_update_domains(r, rdtgrp->closid);
3414 if (ret < 0) {
3415 rdt_last_cmd_puts("Failed to initialize allocations\n");
3416 goto out;
3417 }
3418
3419 }
3420
3421 rdtgrp->mode = RDT_MODE_SHAREABLE;
3422
3423 out:
3424 rdt_staged_configs_clear();
3425 return ret;
3426 }
3427
mkdir_rdt_prepare_rmid_alloc(struct rdtgroup * rdtgrp)3428 static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
3429 {
3430 int ret;
3431
3432 if (!resctrl_arch_mon_capable())
3433 return 0;
3434
3435 ret = alloc_rmid(rdtgrp->closid);
3436 if (ret < 0) {
3437 rdt_last_cmd_puts("Out of RMIDs\n");
3438 return ret;
3439 }
3440 rdtgrp->mon.rmid = ret;
3441
3442 ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
3443 if (ret) {
3444 rdt_last_cmd_puts("kernfs subdir error\n");
3445 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3446 return ret;
3447 }
3448
3449 return 0;
3450 }
3451
mkdir_rdt_prepare_rmid_free(struct rdtgroup * rgrp)3452 static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
3453 {
3454 if (resctrl_arch_mon_capable())
3455 free_rmid(rgrp->closid, rgrp->mon.rmid);
3456 }
3457
mkdir_rdt_prepare(struct kernfs_node * parent_kn,const char * name,umode_t mode,enum rdt_group_type rtype,struct rdtgroup ** r)3458 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
3459 const char *name, umode_t mode,
3460 enum rdt_group_type rtype, struct rdtgroup **r)
3461 {
3462 struct rdtgroup *prdtgrp, *rdtgrp;
3463 unsigned long files = 0;
3464 struct kernfs_node *kn;
3465 int ret;
3466
3467 prdtgrp = rdtgroup_kn_lock_live(parent_kn);
3468 if (!prdtgrp) {
3469 ret = -ENODEV;
3470 goto out_unlock;
3471 }
3472
3473 if (rtype == RDTMON_GROUP &&
3474 (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3475 prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
3476 ret = -EINVAL;
3477 rdt_last_cmd_puts("Pseudo-locking in progress\n");
3478 goto out_unlock;
3479 }
3480
3481 /* allocate the rdtgroup. */
3482 rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
3483 if (!rdtgrp) {
3484 ret = -ENOSPC;
3485 rdt_last_cmd_puts("Kernel out of memory\n");
3486 goto out_unlock;
3487 }
3488 *r = rdtgrp;
3489 rdtgrp->mon.parent = prdtgrp;
3490 rdtgrp->type = rtype;
3491 INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
3492
3493 /* kernfs creates the directory for rdtgrp */
3494 kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
3495 if (IS_ERR(kn)) {
3496 ret = PTR_ERR(kn);
3497 rdt_last_cmd_puts("kernfs create error\n");
3498 goto out_free_rgrp;
3499 }
3500 rdtgrp->kn = kn;
3501
3502 /*
3503 * kernfs_remove() will drop the reference count on "kn" which
3504 * will free it. But we still need it to stick around for the
3505 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
3506 * which will be dropped by kernfs_put() in rdtgroup_remove().
3507 */
3508 kernfs_get(kn);
3509
3510 ret = rdtgroup_kn_set_ugid(kn);
3511 if (ret) {
3512 rdt_last_cmd_puts("kernfs perm error\n");
3513 goto out_destroy;
3514 }
3515
3516 if (rtype == RDTCTRL_GROUP) {
3517 files = RFTYPE_BASE | RFTYPE_CTRL;
3518 if (resctrl_arch_mon_capable())
3519 files |= RFTYPE_MON;
3520 } else {
3521 files = RFTYPE_BASE | RFTYPE_MON;
3522 }
3523
3524 ret = rdtgroup_add_files(kn, files);
3525 if (ret) {
3526 rdt_last_cmd_puts("kernfs fill error\n");
3527 goto out_destroy;
3528 }
3529
3530 /*
3531 * The caller unlocks the parent_kn upon success.
3532 */
3533 return 0;
3534
3535 out_destroy:
3536 kernfs_put(rdtgrp->kn);
3537 kernfs_remove(rdtgrp->kn);
3538 out_free_rgrp:
3539 kfree(rdtgrp);
3540 out_unlock:
3541 rdtgroup_kn_unlock(parent_kn);
3542 return ret;
3543 }
3544
mkdir_rdt_prepare_clean(struct rdtgroup * rgrp)3545 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
3546 {
3547 kernfs_remove(rgrp->kn);
3548 rdtgroup_remove(rgrp);
3549 }
3550
3551 /*
3552 * Create a monitor group under "mon_groups" directory of a control
3553 * and monitor group(ctrl_mon). This is a resource group
3554 * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
3555 */
rdtgroup_mkdir_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3556 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
3557 const char *name, umode_t mode)
3558 {
3559 struct rdtgroup *rdtgrp, *prgrp;
3560 int ret;
3561
3562 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
3563 if (ret)
3564 return ret;
3565
3566 prgrp = rdtgrp->mon.parent;
3567 rdtgrp->closid = prgrp->closid;
3568
3569 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3570 if (ret) {
3571 mkdir_rdt_prepare_clean(rdtgrp);
3572 goto out_unlock;
3573 }
3574
3575 kernfs_activate(rdtgrp->kn);
3576
3577 /*
3578 * Add the rdtgrp to the list of rdtgrps the parent
3579 * ctrl_mon group has to track.
3580 */
3581 list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
3582
3583 out_unlock:
3584 rdtgroup_kn_unlock(parent_kn);
3585 return ret;
3586 }
3587
3588 /*
3589 * These are rdtgroups created under the root directory. Can be used
3590 * to allocate and monitor resources.
3591 */
rdtgroup_mkdir_ctrl_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3592 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
3593 const char *name, umode_t mode)
3594 {
3595 struct rdtgroup *rdtgrp;
3596 struct kernfs_node *kn;
3597 u32 closid;
3598 int ret;
3599
3600 ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
3601 if (ret)
3602 return ret;
3603
3604 kn = rdtgrp->kn;
3605 ret = closid_alloc();
3606 if (ret < 0) {
3607 rdt_last_cmd_puts("Out of CLOSIDs\n");
3608 goto out_common_fail;
3609 }
3610 closid = ret;
3611 ret = 0;
3612
3613 rdtgrp->closid = closid;
3614
3615 ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3616 if (ret)
3617 goto out_closid_free;
3618
3619 kernfs_activate(rdtgrp->kn);
3620
3621 ret = rdtgroup_init_alloc(rdtgrp);
3622 if (ret < 0)
3623 goto out_rmid_free;
3624
3625 list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
3626
3627 if (resctrl_arch_mon_capable()) {
3628 /*
3629 * Create an empty mon_groups directory to hold the subset
3630 * of tasks and cpus to monitor.
3631 */
3632 ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
3633 if (ret) {
3634 rdt_last_cmd_puts("kernfs subdir error\n");
3635 goto out_del_list;
3636 }
3637 if (is_mba_sc(NULL))
3638 rdtgrp->mba_mbps_event = mba_mbps_default_event;
3639 }
3640
3641 goto out_unlock;
3642
3643 out_del_list:
3644 list_del(&rdtgrp->rdtgroup_list);
3645 out_rmid_free:
3646 mkdir_rdt_prepare_rmid_free(rdtgrp);
3647 out_closid_free:
3648 closid_free(closid);
3649 out_common_fail:
3650 mkdir_rdt_prepare_clean(rdtgrp);
3651 out_unlock:
3652 rdtgroup_kn_unlock(parent_kn);
3653 return ret;
3654 }
3655
3656 /*
3657 * We allow creating mon groups only with in a directory called "mon_groups"
3658 * which is present in every ctrl_mon group. Check if this is a valid
3659 * "mon_groups" directory.
3660 *
3661 * 1. The directory should be named "mon_groups".
3662 * 2. The mon group itself should "not" be named "mon_groups".
3663 * This makes sure "mon_groups" directory always has a ctrl_mon group
3664 * as parent.
3665 */
is_mon_groups(struct kernfs_node * kn,const char * name)3666 static bool is_mon_groups(struct kernfs_node *kn, const char *name)
3667 {
3668 return (!strcmp(kn->name, "mon_groups") &&
3669 strcmp(name, "mon_groups"));
3670 }
3671
rdtgroup_mkdir(struct kernfs_node * parent_kn,const char * name,umode_t mode)3672 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
3673 umode_t mode)
3674 {
3675 /* Do not accept '\n' to avoid unparsable situation. */
3676 if (strchr(name, '\n'))
3677 return -EINVAL;
3678
3679 /*
3680 * If the parent directory is the root directory and RDT
3681 * allocation is supported, add a control and monitoring
3682 * subdirectory
3683 */
3684 if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
3685 return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
3686
3687 /*
3688 * If RDT monitoring is supported and the parent directory is a valid
3689 * "mon_groups" directory, add a monitoring subdirectory.
3690 */
3691 if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name))
3692 return rdtgroup_mkdir_mon(parent_kn, name, mode);
3693
3694 return -EPERM;
3695 }
3696
rdtgroup_rmdir_mon(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3697 static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3698 {
3699 struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3700 int cpu;
3701
3702 /* Give any tasks back to the parent group */
3703 rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
3704
3705 /* Update per cpu rmid of the moved CPUs first */
3706 for_each_cpu(cpu, &rdtgrp->cpu_mask)
3707 per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
3708 /*
3709 * Update the MSR on moved CPUs and CPUs which have moved
3710 * task running on them.
3711 */
3712 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3713 update_closid_rmid(tmpmask, NULL);
3714
3715 rdtgrp->flags = RDT_DELETED;
3716 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3717
3718 /*
3719 * Remove the rdtgrp from the parent ctrl_mon group's list
3720 */
3721 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3722 list_del(&rdtgrp->mon.crdtgrp_list);
3723
3724 kernfs_remove(rdtgrp->kn);
3725
3726 return 0;
3727 }
3728
rdtgroup_ctrl_remove(struct rdtgroup * rdtgrp)3729 static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
3730 {
3731 rdtgrp->flags = RDT_DELETED;
3732 list_del(&rdtgrp->rdtgroup_list);
3733
3734 kernfs_remove(rdtgrp->kn);
3735 return 0;
3736 }
3737
rdtgroup_rmdir_ctrl(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3738 static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3739 {
3740 int cpu;
3741
3742 /* Give any tasks back to the default group */
3743 rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
3744
3745 /* Give any CPUs back to the default group */
3746 cpumask_or(&rdtgroup_default.cpu_mask,
3747 &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3748
3749 /* Update per cpu closid and rmid of the moved CPUs first */
3750 for_each_cpu(cpu, &rdtgrp->cpu_mask) {
3751 per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
3752 per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
3753 }
3754
3755 /*
3756 * Update the MSR on moved CPUs and CPUs which have moved
3757 * task running on them.
3758 */
3759 cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3760 update_closid_rmid(tmpmask, NULL);
3761
3762 free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3763 closid_free(rdtgrp->closid);
3764
3765 rdtgroup_ctrl_remove(rdtgrp);
3766
3767 /*
3768 * Free all the child monitor group rmids.
3769 */
3770 free_all_child_rdtgrp(rdtgrp);
3771
3772 return 0;
3773 }
3774
rdtgroup_rmdir(struct kernfs_node * kn)3775 static int rdtgroup_rmdir(struct kernfs_node *kn)
3776 {
3777 struct kernfs_node *parent_kn = kn->parent;
3778 struct rdtgroup *rdtgrp;
3779 cpumask_var_t tmpmask;
3780 int ret = 0;
3781
3782 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
3783 return -ENOMEM;
3784
3785 rdtgrp = rdtgroup_kn_lock_live(kn);
3786 if (!rdtgrp) {
3787 ret = -EPERM;
3788 goto out;
3789 }
3790
3791 /*
3792 * If the rdtgroup is a ctrl_mon group and parent directory
3793 * is the root directory, remove the ctrl_mon group.
3794 *
3795 * If the rdtgroup is a mon group and parent directory
3796 * is a valid "mon_groups" directory, remove the mon group.
3797 */
3798 if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
3799 rdtgrp != &rdtgroup_default) {
3800 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3801 rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
3802 ret = rdtgroup_ctrl_remove(rdtgrp);
3803 } else {
3804 ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
3805 }
3806 } else if (rdtgrp->type == RDTMON_GROUP &&
3807 is_mon_groups(parent_kn, kn->name)) {
3808 ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
3809 } else {
3810 ret = -EPERM;
3811 }
3812
3813 out:
3814 rdtgroup_kn_unlock(kn);
3815 free_cpumask_var(tmpmask);
3816 return ret;
3817 }
3818
3819 /**
3820 * mongrp_reparent() - replace parent CTRL_MON group of a MON group
3821 * @rdtgrp: the MON group whose parent should be replaced
3822 * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp
3823 * @cpus: cpumask provided by the caller for use during this call
3824 *
3825 * Replaces the parent CTRL_MON group for a MON group, resulting in all member
3826 * tasks' CLOSID immediately changing to that of the new parent group.
3827 * Monitoring data for the group is unaffected by this operation.
3828 */
mongrp_reparent(struct rdtgroup * rdtgrp,struct rdtgroup * new_prdtgrp,cpumask_var_t cpus)3829 static void mongrp_reparent(struct rdtgroup *rdtgrp,
3830 struct rdtgroup *new_prdtgrp,
3831 cpumask_var_t cpus)
3832 {
3833 struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3834
3835 WARN_ON(rdtgrp->type != RDTMON_GROUP);
3836 WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
3837
3838 /* Nothing to do when simply renaming a MON group. */
3839 if (prdtgrp == new_prdtgrp)
3840 return;
3841
3842 WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3843 list_move_tail(&rdtgrp->mon.crdtgrp_list,
3844 &new_prdtgrp->mon.crdtgrp_list);
3845
3846 rdtgrp->mon.parent = new_prdtgrp;
3847 rdtgrp->closid = new_prdtgrp->closid;
3848
3849 /* Propagate updated closid to all tasks in this group. */
3850 rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
3851
3852 update_closid_rmid(cpus, NULL);
3853 }
3854
rdtgroup_rename(struct kernfs_node * kn,struct kernfs_node * new_parent,const char * new_name)3855 static int rdtgroup_rename(struct kernfs_node *kn,
3856 struct kernfs_node *new_parent, const char *new_name)
3857 {
3858 struct rdtgroup *new_prdtgrp;
3859 struct rdtgroup *rdtgrp;
3860 cpumask_var_t tmpmask;
3861 int ret;
3862
3863 rdtgrp = kernfs_to_rdtgroup(kn);
3864 new_prdtgrp = kernfs_to_rdtgroup(new_parent);
3865 if (!rdtgrp || !new_prdtgrp)
3866 return -ENOENT;
3867
3868 /* Release both kernfs active_refs before obtaining rdtgroup mutex. */
3869 rdtgroup_kn_get(rdtgrp, kn);
3870 rdtgroup_kn_get(new_prdtgrp, new_parent);
3871
3872 mutex_lock(&rdtgroup_mutex);
3873
3874 rdt_last_cmd_clear();
3875
3876 /*
3877 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
3878 * either kernfs_node is a file.
3879 */
3880 if (kernfs_type(kn) != KERNFS_DIR ||
3881 kernfs_type(new_parent) != KERNFS_DIR) {
3882 rdt_last_cmd_puts("Source and destination must be directories");
3883 ret = -EPERM;
3884 goto out;
3885 }
3886
3887 if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
3888 ret = -ENOENT;
3889 goto out;
3890 }
3891
3892 if (rdtgrp->type != RDTMON_GROUP || !kn->parent ||
3893 !is_mon_groups(kn->parent, kn->name)) {
3894 rdt_last_cmd_puts("Source must be a MON group\n");
3895 ret = -EPERM;
3896 goto out;
3897 }
3898
3899 if (!is_mon_groups(new_parent, new_name)) {
3900 rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
3901 ret = -EPERM;
3902 goto out;
3903 }
3904
3905 /*
3906 * If the MON group is monitoring CPUs, the CPUs must be assigned to the
3907 * current parent CTRL_MON group and therefore cannot be assigned to
3908 * the new parent, making the move illegal.
3909 */
3910 if (!cpumask_empty(&rdtgrp->cpu_mask) &&
3911 rdtgrp->mon.parent != new_prdtgrp) {
3912 rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
3913 ret = -EPERM;
3914 goto out;
3915 }
3916
3917 /*
3918 * Allocate the cpumask for use in mongrp_reparent() to avoid the
3919 * possibility of failing to allocate it after kernfs_rename() has
3920 * succeeded.
3921 */
3922 if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
3923 ret = -ENOMEM;
3924 goto out;
3925 }
3926
3927 /*
3928 * Perform all input validation and allocations needed to ensure
3929 * mongrp_reparent() will succeed before calling kernfs_rename(),
3930 * otherwise it would be necessary to revert this call if
3931 * mongrp_reparent() failed.
3932 */
3933 ret = kernfs_rename(kn, new_parent, new_name);
3934 if (!ret)
3935 mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
3936
3937 free_cpumask_var(tmpmask);
3938
3939 out:
3940 mutex_unlock(&rdtgroup_mutex);
3941 rdtgroup_kn_put(rdtgrp, kn);
3942 rdtgroup_kn_put(new_prdtgrp, new_parent);
3943 return ret;
3944 }
3945
rdtgroup_show_options(struct seq_file * seq,struct kernfs_root * kf)3946 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
3947 {
3948 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
3949 seq_puts(seq, ",cdp");
3950
3951 if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
3952 seq_puts(seq, ",cdpl2");
3953
3954 if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
3955 seq_puts(seq, ",mba_MBps");
3956
3957 if (resctrl_debug)
3958 seq_puts(seq, ",debug");
3959
3960 return 0;
3961 }
3962
3963 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
3964 .mkdir = rdtgroup_mkdir,
3965 .rmdir = rdtgroup_rmdir,
3966 .rename = rdtgroup_rename,
3967 .show_options = rdtgroup_show_options,
3968 };
3969
rdtgroup_setup_root(struct rdt_fs_context * ctx)3970 static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
3971 {
3972 rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
3973 KERNFS_ROOT_CREATE_DEACTIVATED |
3974 KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
3975 &rdtgroup_default);
3976 if (IS_ERR(rdt_root))
3977 return PTR_ERR(rdt_root);
3978
3979 ctx->kfc.root = rdt_root;
3980 rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
3981
3982 return 0;
3983 }
3984
rdtgroup_destroy_root(void)3985 static void rdtgroup_destroy_root(void)
3986 {
3987 kernfs_destroy_root(rdt_root);
3988 rdtgroup_default.kn = NULL;
3989 }
3990
rdtgroup_setup_default(void)3991 static void __init rdtgroup_setup_default(void)
3992 {
3993 mutex_lock(&rdtgroup_mutex);
3994
3995 rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
3996 rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
3997 rdtgroup_default.type = RDTCTRL_GROUP;
3998 INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
3999
4000 list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
4001
4002 mutex_unlock(&rdtgroup_mutex);
4003 }
4004
domain_destroy_mon_state(struct rdt_mon_domain * d)4005 static void domain_destroy_mon_state(struct rdt_mon_domain *d)
4006 {
4007 bitmap_free(d->rmid_busy_llc);
4008 kfree(d->mbm_total);
4009 kfree(d->mbm_local);
4010 }
4011
resctrl_offline_ctrl_domain(struct rdt_resource * r,struct rdt_ctrl_domain * d)4012 void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
4013 {
4014 mutex_lock(&rdtgroup_mutex);
4015
4016 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
4017 mba_sc_domain_destroy(r, d);
4018
4019 mutex_unlock(&rdtgroup_mutex);
4020 }
4021
resctrl_offline_mon_domain(struct rdt_resource * r,struct rdt_mon_domain * d)4022 void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4023 {
4024 mutex_lock(&rdtgroup_mutex);
4025
4026 /*
4027 * If resctrl is mounted, remove all the
4028 * per domain monitor data directories.
4029 */
4030 if (resctrl_mounted && resctrl_arch_mon_capable())
4031 rmdir_mondata_subdir_allrdtgrp(r, d);
4032
4033 if (is_mbm_enabled())
4034 cancel_delayed_work(&d->mbm_over);
4035 if (is_llc_occupancy_enabled() && has_busy_rmid(d)) {
4036 /*
4037 * When a package is going down, forcefully
4038 * decrement rmid->ebusy. There is no way to know
4039 * that the L3 was flushed and hence may lead to
4040 * incorrect counts in rare scenarios, but leaving
4041 * the RMID as busy creates RMID leaks if the
4042 * package never comes back.
4043 */
4044 __check_limbo(d, true);
4045 cancel_delayed_work(&d->cqm_limbo);
4046 }
4047
4048 domain_destroy_mon_state(d);
4049
4050 mutex_unlock(&rdtgroup_mutex);
4051 }
4052
domain_setup_mon_state(struct rdt_resource * r,struct rdt_mon_domain * d)4053 static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
4054 {
4055 u32 idx_limit = resctrl_arch_system_num_rmid_idx();
4056 size_t tsize;
4057
4058 if (is_llc_occupancy_enabled()) {
4059 d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
4060 if (!d->rmid_busy_llc)
4061 return -ENOMEM;
4062 }
4063 if (is_mbm_total_enabled()) {
4064 tsize = sizeof(*d->mbm_total);
4065 d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
4066 if (!d->mbm_total) {
4067 bitmap_free(d->rmid_busy_llc);
4068 return -ENOMEM;
4069 }
4070 }
4071 if (is_mbm_local_enabled()) {
4072 tsize = sizeof(*d->mbm_local);
4073 d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
4074 if (!d->mbm_local) {
4075 bitmap_free(d->rmid_busy_llc);
4076 kfree(d->mbm_total);
4077 return -ENOMEM;
4078 }
4079 }
4080
4081 return 0;
4082 }
4083
resctrl_online_ctrl_domain(struct rdt_resource * r,struct rdt_ctrl_domain * d)4084 int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
4085 {
4086 int err = 0;
4087
4088 mutex_lock(&rdtgroup_mutex);
4089
4090 if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
4091 /* RDT_RESOURCE_MBA is never mon_capable */
4092 err = mba_sc_domain_allocate(r, d);
4093 }
4094
4095 mutex_unlock(&rdtgroup_mutex);
4096
4097 return err;
4098 }
4099
resctrl_online_mon_domain(struct rdt_resource * r,struct rdt_mon_domain * d)4100 int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4101 {
4102 int err;
4103
4104 mutex_lock(&rdtgroup_mutex);
4105
4106 err = domain_setup_mon_state(r, d);
4107 if (err)
4108 goto out_unlock;
4109
4110 if (is_mbm_enabled()) {
4111 INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
4112 mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
4113 RESCTRL_PICK_ANY_CPU);
4114 }
4115
4116 if (is_llc_occupancy_enabled())
4117 INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
4118
4119 /*
4120 * If the filesystem is not mounted then only the default resource group
4121 * exists. Creation of its directories is deferred until mount time
4122 * by rdt_get_tree() calling mkdir_mondata_all().
4123 * If resctrl is mounted, add per domain monitor data directories.
4124 */
4125 if (resctrl_mounted && resctrl_arch_mon_capable())
4126 mkdir_mondata_subdir_allrdtgrp(r, d);
4127
4128 out_unlock:
4129 mutex_unlock(&rdtgroup_mutex);
4130
4131 return err;
4132 }
4133
resctrl_online_cpu(unsigned int cpu)4134 void resctrl_online_cpu(unsigned int cpu)
4135 {
4136 mutex_lock(&rdtgroup_mutex);
4137 /* The CPU is set in default rdtgroup after online. */
4138 cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
4139 mutex_unlock(&rdtgroup_mutex);
4140 }
4141
clear_childcpus(struct rdtgroup * r,unsigned int cpu)4142 static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
4143 {
4144 struct rdtgroup *cr;
4145
4146 list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
4147 if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
4148 break;
4149 }
4150 }
4151
resctrl_offline_cpu(unsigned int cpu)4152 void resctrl_offline_cpu(unsigned int cpu)
4153 {
4154 struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
4155 struct rdt_mon_domain *d;
4156 struct rdtgroup *rdtgrp;
4157
4158 mutex_lock(&rdtgroup_mutex);
4159 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
4160 if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
4161 clear_childcpus(rdtgrp, cpu);
4162 break;
4163 }
4164 }
4165
4166 if (!l3->mon_capable)
4167 goto out_unlock;
4168
4169 d = get_mon_domain_from_cpu(cpu, l3);
4170 if (d) {
4171 if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
4172 cancel_delayed_work(&d->mbm_over);
4173 mbm_setup_overflow_handler(d, 0, cpu);
4174 }
4175 if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
4176 has_busy_rmid(d)) {
4177 cancel_delayed_work(&d->cqm_limbo);
4178 cqm_setup_limbo_handler(d, 0, cpu);
4179 }
4180 }
4181
4182 out_unlock:
4183 mutex_unlock(&rdtgroup_mutex);
4184 }
4185
4186 /*
4187 * rdtgroup_init - rdtgroup initialization
4188 *
4189 * Setup resctrl file system including set up root, create mount point,
4190 * register rdtgroup filesystem, and initialize files under root directory.
4191 *
4192 * Return: 0 on success or -errno
4193 */
rdtgroup_init(void)4194 int __init rdtgroup_init(void)
4195 {
4196 int ret = 0;
4197
4198 seq_buf_init(&last_cmd_status, last_cmd_status_buf,
4199 sizeof(last_cmd_status_buf));
4200
4201 rdtgroup_setup_default();
4202
4203 ret = sysfs_create_mount_point(fs_kobj, "resctrl");
4204 if (ret)
4205 return ret;
4206
4207 ret = register_filesystem(&rdt_fs_type);
4208 if (ret)
4209 goto cleanup_mountpoint;
4210
4211 /*
4212 * Adding the resctrl debugfs directory here may not be ideal since
4213 * it would let the resctrl debugfs directory appear on the debugfs
4214 * filesystem before the resctrl filesystem is mounted.
4215 * It may also be ok since that would enable debugging of RDT before
4216 * resctrl is mounted.
4217 * The reason why the debugfs directory is created here and not in
4218 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
4219 * during the debugfs directory creation also &sb->s_type->i_mutex_key
4220 * (the lockdep class of inode->i_rwsem). Other filesystem
4221 * interactions (eg. SyS_getdents) have the lock ordering:
4222 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
4223 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
4224 * is taken, thus creating dependency:
4225 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
4226 * issues considering the other two lock dependencies.
4227 * By creating the debugfs directory here we avoid a dependency
4228 * that may cause deadlock (even though file operations cannot
4229 * occur until the filesystem is mounted, but I do not know how to
4230 * tell lockdep that).
4231 */
4232 debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
4233
4234 return 0;
4235
4236 cleanup_mountpoint:
4237 sysfs_remove_mount_point(fs_kobj, "resctrl");
4238
4239 return ret;
4240 }
4241
rdtgroup_exit(void)4242 void __exit rdtgroup_exit(void)
4243 {
4244 debugfs_remove_recursive(debugfs_resctrl);
4245 unregister_filesystem(&rdt_fs_type);
4246 sysfs_remove_mount_point(fs_kobj, "resctrl");
4247 }
4248