1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * User interface for Resource Allocation in Resource Director Technology(RDT)
4  *
5  * Copyright (C) 2016 Intel Corporation
6  *
7  * Author: Fenghua Yu <[email protected]>
8  *
9  * More information about RDT be found in the Intel (R) x86 Architecture
10  * Software Developer Manual.
11  */
12 
13 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
14 
15 #include <linux/cpu.h>
16 #include <linux/debugfs.h>
17 #include <linux/fs.h>
18 #include <linux/fs_parser.h>
19 #include <linux/sysfs.h>
20 #include <linux/kernfs.h>
21 #include <linux/seq_buf.h>
22 #include <linux/seq_file.h>
23 #include <linux/sched/signal.h>
24 #include <linux/sched/task.h>
25 #include <linux/slab.h>
26 #include <linux/task_work.h>
27 #include <linux/user_namespace.h>
28 
29 #include <uapi/linux/magic.h>
30 
31 #include <asm/resctrl.h>
32 #include "internal.h"
33 
34 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
35 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
36 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
37 
38 /* Mutex to protect rdtgroup access. */
39 DEFINE_MUTEX(rdtgroup_mutex);
40 
41 static struct kernfs_root *rdt_root;
42 struct rdtgroup rdtgroup_default;
43 LIST_HEAD(rdt_all_groups);
44 
45 /* list of entries for the schemata file */
46 LIST_HEAD(resctrl_schema_all);
47 
48 /* The filesystem can only be mounted once. */
49 bool resctrl_mounted;
50 
51 /* Kernel fs node for "info" directory under root */
52 static struct kernfs_node *kn_info;
53 
54 /* Kernel fs node for "mon_groups" directory under root */
55 static struct kernfs_node *kn_mongrp;
56 
57 /* Kernel fs node for "mon_data" directory under root */
58 static struct kernfs_node *kn_mondata;
59 
60 static struct seq_buf last_cmd_status;
61 static char last_cmd_status_buf[512];
62 
63 static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
64 static void rdtgroup_destroy_root(void);
65 
66 struct dentry *debugfs_resctrl;
67 
68 /*
69  * Memory bandwidth monitoring event to use for the default CTRL_MON group
70  * and each new CTRL_MON group created by the user.  Only relevant when
71  * the filesystem is mounted with the "mba_MBps" option so it does not
72  * matter that it remains uninitialized on systems that do not support
73  * the "mba_MBps" option.
74  */
75 enum resctrl_event_id mba_mbps_default_event;
76 
77 static bool resctrl_debug;
78 
rdt_last_cmd_clear(void)79 void rdt_last_cmd_clear(void)
80 {
81 	lockdep_assert_held(&rdtgroup_mutex);
82 	seq_buf_clear(&last_cmd_status);
83 }
84 
rdt_last_cmd_puts(const char * s)85 void rdt_last_cmd_puts(const char *s)
86 {
87 	lockdep_assert_held(&rdtgroup_mutex);
88 	seq_buf_puts(&last_cmd_status, s);
89 }
90 
rdt_last_cmd_printf(const char * fmt,...)91 void rdt_last_cmd_printf(const char *fmt, ...)
92 {
93 	va_list ap;
94 
95 	va_start(ap, fmt);
96 	lockdep_assert_held(&rdtgroup_mutex);
97 	seq_buf_vprintf(&last_cmd_status, fmt, ap);
98 	va_end(ap);
99 }
100 
rdt_staged_configs_clear(void)101 void rdt_staged_configs_clear(void)
102 {
103 	struct rdt_ctrl_domain *dom;
104 	struct rdt_resource *r;
105 
106 	lockdep_assert_held(&rdtgroup_mutex);
107 
108 	for_each_alloc_capable_rdt_resource(r) {
109 		list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
110 			memset(dom->staged_config, 0, sizeof(dom->staged_config));
111 	}
112 }
113 
114 /*
115  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
116  * we can keep a bitmap of free CLOSIDs in a single integer.
117  *
118  * Using a global CLOSID across all resources has some advantages and
119  * some drawbacks:
120  * + We can simply set current's closid to assign a task to a resource
121  *   group.
122  * + Context switch code can avoid extra memory references deciding which
123  *   CLOSID to load into the PQR_ASSOC MSR
124  * - We give up some options in configuring resource groups across multi-socket
125  *   systems.
126  * - Our choices on how to configure each resource become progressively more
127  *   limited as the number of resources grows.
128  */
129 static unsigned long closid_free_map;
130 static int closid_free_map_len;
131 
closids_supported(void)132 int closids_supported(void)
133 {
134 	return closid_free_map_len;
135 }
136 
closid_init(void)137 static void closid_init(void)
138 {
139 	struct resctrl_schema *s;
140 	u32 rdt_min_closid = 32;
141 
142 	/* Compute rdt_min_closid across all resources */
143 	list_for_each_entry(s, &resctrl_schema_all, list)
144 		rdt_min_closid = min(rdt_min_closid, s->num_closid);
145 
146 	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
147 
148 	/* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
149 	__clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map);
150 	closid_free_map_len = rdt_min_closid;
151 }
152 
closid_alloc(void)153 static int closid_alloc(void)
154 {
155 	int cleanest_closid;
156 	u32 closid;
157 
158 	lockdep_assert_held(&rdtgroup_mutex);
159 
160 	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
161 	    is_llc_occupancy_enabled()) {
162 		cleanest_closid = resctrl_find_cleanest_closid();
163 		if (cleanest_closid < 0)
164 			return cleanest_closid;
165 		closid = cleanest_closid;
166 	} else {
167 		closid = ffs(closid_free_map);
168 		if (closid == 0)
169 			return -ENOSPC;
170 		closid--;
171 	}
172 	__clear_bit(closid, &closid_free_map);
173 
174 	return closid;
175 }
176 
closid_free(int closid)177 void closid_free(int closid)
178 {
179 	lockdep_assert_held(&rdtgroup_mutex);
180 
181 	__set_bit(closid, &closid_free_map);
182 }
183 
184 /**
185  * closid_allocated - test if provided closid is in use
186  * @closid: closid to be tested
187  *
188  * Return: true if @closid is currently associated with a resource group,
189  * false if @closid is free
190  */
closid_allocated(unsigned int closid)191 bool closid_allocated(unsigned int closid)
192 {
193 	lockdep_assert_held(&rdtgroup_mutex);
194 
195 	return !test_bit(closid, &closid_free_map);
196 }
197 
198 /**
199  * rdtgroup_mode_by_closid - Return mode of resource group with closid
200  * @closid: closid if the resource group
201  *
202  * Each resource group is associated with a @closid. Here the mode
203  * of a resource group can be queried by searching for it using its closid.
204  *
205  * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
206  */
rdtgroup_mode_by_closid(int closid)207 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
208 {
209 	struct rdtgroup *rdtgrp;
210 
211 	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
212 		if (rdtgrp->closid == closid)
213 			return rdtgrp->mode;
214 	}
215 
216 	return RDT_NUM_MODES;
217 }
218 
219 static const char * const rdt_mode_str[] = {
220 	[RDT_MODE_SHAREABLE]		= "shareable",
221 	[RDT_MODE_EXCLUSIVE]		= "exclusive",
222 	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
223 	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
224 };
225 
226 /**
227  * rdtgroup_mode_str - Return the string representation of mode
228  * @mode: the resource group mode as &enum rdtgroup_mode
229  *
230  * Return: string representation of valid mode, "unknown" otherwise
231  */
rdtgroup_mode_str(enum rdtgrp_mode mode)232 static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
233 {
234 	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
235 		return "unknown";
236 
237 	return rdt_mode_str[mode];
238 }
239 
240 /* set uid and gid of rdtgroup dirs and files to that of the creator */
rdtgroup_kn_set_ugid(struct kernfs_node * kn)241 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
242 {
243 	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
244 				.ia_uid = current_fsuid(),
245 				.ia_gid = current_fsgid(), };
246 
247 	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
248 	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
249 		return 0;
250 
251 	return kernfs_setattr(kn, &iattr);
252 }
253 
rdtgroup_add_file(struct kernfs_node * parent_kn,struct rftype * rft)254 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
255 {
256 	struct kernfs_node *kn;
257 	int ret;
258 
259 	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
260 				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
261 				  0, rft->kf_ops, rft, NULL, NULL);
262 	if (IS_ERR(kn))
263 		return PTR_ERR(kn);
264 
265 	ret = rdtgroup_kn_set_ugid(kn);
266 	if (ret) {
267 		kernfs_remove(kn);
268 		return ret;
269 	}
270 
271 	return 0;
272 }
273 
rdtgroup_seqfile_show(struct seq_file * m,void * arg)274 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
275 {
276 	struct kernfs_open_file *of = m->private;
277 	struct rftype *rft = of->kn->priv;
278 
279 	if (rft->seq_show)
280 		return rft->seq_show(of, m, arg);
281 	return 0;
282 }
283 
rdtgroup_file_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)284 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
285 				   size_t nbytes, loff_t off)
286 {
287 	struct rftype *rft = of->kn->priv;
288 
289 	if (rft->write)
290 		return rft->write(of, buf, nbytes, off);
291 
292 	return -EINVAL;
293 }
294 
295 static const struct kernfs_ops rdtgroup_kf_single_ops = {
296 	.atomic_write_len	= PAGE_SIZE,
297 	.write			= rdtgroup_file_write,
298 	.seq_show		= rdtgroup_seqfile_show,
299 };
300 
301 static const struct kernfs_ops kf_mondata_ops = {
302 	.atomic_write_len	= PAGE_SIZE,
303 	.seq_show		= rdtgroup_mondata_show,
304 };
305 
is_cpu_list(struct kernfs_open_file * of)306 static bool is_cpu_list(struct kernfs_open_file *of)
307 {
308 	struct rftype *rft = of->kn->priv;
309 
310 	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
311 }
312 
rdtgroup_cpus_show(struct kernfs_open_file * of,struct seq_file * s,void * v)313 static int rdtgroup_cpus_show(struct kernfs_open_file *of,
314 			      struct seq_file *s, void *v)
315 {
316 	struct rdtgroup *rdtgrp;
317 	struct cpumask *mask;
318 	int ret = 0;
319 
320 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
321 
322 	if (rdtgrp) {
323 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
324 			if (!rdtgrp->plr->d) {
325 				rdt_last_cmd_clear();
326 				rdt_last_cmd_puts("Cache domain offline\n");
327 				ret = -ENODEV;
328 			} else {
329 				mask = &rdtgrp->plr->d->hdr.cpu_mask;
330 				seq_printf(s, is_cpu_list(of) ?
331 					   "%*pbl\n" : "%*pb\n",
332 					   cpumask_pr_args(mask));
333 			}
334 		} else {
335 			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
336 				   cpumask_pr_args(&rdtgrp->cpu_mask));
337 		}
338 	} else {
339 		ret = -ENOENT;
340 	}
341 	rdtgroup_kn_unlock(of->kn);
342 
343 	return ret;
344 }
345 
346 /*
347  * This is safe against resctrl_sched_in() called from __switch_to()
348  * because __switch_to() is executed with interrupts disabled. A local call
349  * from update_closid_rmid() is protected against __switch_to() because
350  * preemption is disabled.
351  */
update_cpu_closid_rmid(void * info)352 static void update_cpu_closid_rmid(void *info)
353 {
354 	struct rdtgroup *r = info;
355 
356 	if (r) {
357 		this_cpu_write(pqr_state.default_closid, r->closid);
358 		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
359 	}
360 
361 	/*
362 	 * We cannot unconditionally write the MSR because the current
363 	 * executing task might have its own closid selected. Just reuse
364 	 * the context switch code.
365 	 */
366 	resctrl_sched_in(current);
367 }
368 
369 /*
370  * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
371  *
372  * Per task closids/rmids must have been set up before calling this function.
373  */
374 static void
update_closid_rmid(const struct cpumask * cpu_mask,struct rdtgroup * r)375 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
376 {
377 	on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1);
378 }
379 
cpus_mon_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask)380 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
381 			  cpumask_var_t tmpmask)
382 {
383 	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
384 	struct list_head *head;
385 
386 	/* Check whether cpus belong to parent ctrl group */
387 	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
388 	if (!cpumask_empty(tmpmask)) {
389 		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
390 		return -EINVAL;
391 	}
392 
393 	/* Check whether cpus are dropped from this group */
394 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
395 	if (!cpumask_empty(tmpmask)) {
396 		/* Give any dropped cpus to parent rdtgroup */
397 		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
398 		update_closid_rmid(tmpmask, prgrp);
399 	}
400 
401 	/*
402 	 * If we added cpus, remove them from previous group that owned them
403 	 * and update per-cpu rmid
404 	 */
405 	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
406 	if (!cpumask_empty(tmpmask)) {
407 		head = &prgrp->mon.crdtgrp_list;
408 		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
409 			if (crgrp == rdtgrp)
410 				continue;
411 			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
412 				       tmpmask);
413 		}
414 		update_closid_rmid(tmpmask, rdtgrp);
415 	}
416 
417 	/* Done pushing/pulling - update this group with new mask */
418 	cpumask_copy(&rdtgrp->cpu_mask, newmask);
419 
420 	return 0;
421 }
422 
cpumask_rdtgrp_clear(struct rdtgroup * r,struct cpumask * m)423 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
424 {
425 	struct rdtgroup *crgrp;
426 
427 	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
428 	/* update the child mon group masks as well*/
429 	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
430 		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
431 }
432 
cpus_ctrl_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask,cpumask_var_t tmpmask1)433 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
434 			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
435 {
436 	struct rdtgroup *r, *crgrp;
437 	struct list_head *head;
438 
439 	/* Check whether cpus are dropped from this group */
440 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
441 	if (!cpumask_empty(tmpmask)) {
442 		/* Can't drop from default group */
443 		if (rdtgrp == &rdtgroup_default) {
444 			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
445 			return -EINVAL;
446 		}
447 
448 		/* Give any dropped cpus to rdtgroup_default */
449 		cpumask_or(&rdtgroup_default.cpu_mask,
450 			   &rdtgroup_default.cpu_mask, tmpmask);
451 		update_closid_rmid(tmpmask, &rdtgroup_default);
452 	}
453 
454 	/*
455 	 * If we added cpus, remove them from previous group and
456 	 * the prev group's child groups that owned them
457 	 * and update per-cpu closid/rmid.
458 	 */
459 	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
460 	if (!cpumask_empty(tmpmask)) {
461 		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
462 			if (r == rdtgrp)
463 				continue;
464 			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
465 			if (!cpumask_empty(tmpmask1))
466 				cpumask_rdtgrp_clear(r, tmpmask1);
467 		}
468 		update_closid_rmid(tmpmask, rdtgrp);
469 	}
470 
471 	/* Done pushing/pulling - update this group with new mask */
472 	cpumask_copy(&rdtgrp->cpu_mask, newmask);
473 
474 	/*
475 	 * Clear child mon group masks since there is a new parent mask
476 	 * now and update the rmid for the cpus the child lost.
477 	 */
478 	head = &rdtgrp->mon.crdtgrp_list;
479 	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
480 		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
481 		update_closid_rmid(tmpmask, rdtgrp);
482 		cpumask_clear(&crgrp->cpu_mask);
483 	}
484 
485 	return 0;
486 }
487 
rdtgroup_cpus_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)488 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
489 				   char *buf, size_t nbytes, loff_t off)
490 {
491 	cpumask_var_t tmpmask, newmask, tmpmask1;
492 	struct rdtgroup *rdtgrp;
493 	int ret;
494 
495 	if (!buf)
496 		return -EINVAL;
497 
498 	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
499 		return -ENOMEM;
500 	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
501 		free_cpumask_var(tmpmask);
502 		return -ENOMEM;
503 	}
504 	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
505 		free_cpumask_var(tmpmask);
506 		free_cpumask_var(newmask);
507 		return -ENOMEM;
508 	}
509 
510 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
511 	if (!rdtgrp) {
512 		ret = -ENOENT;
513 		goto unlock;
514 	}
515 
516 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
517 	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
518 		ret = -EINVAL;
519 		rdt_last_cmd_puts("Pseudo-locking in progress\n");
520 		goto unlock;
521 	}
522 
523 	if (is_cpu_list(of))
524 		ret = cpulist_parse(buf, newmask);
525 	else
526 		ret = cpumask_parse(buf, newmask);
527 
528 	if (ret) {
529 		rdt_last_cmd_puts("Bad CPU list/mask\n");
530 		goto unlock;
531 	}
532 
533 	/* check that user didn't specify any offline cpus */
534 	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
535 	if (!cpumask_empty(tmpmask)) {
536 		ret = -EINVAL;
537 		rdt_last_cmd_puts("Can only assign online CPUs\n");
538 		goto unlock;
539 	}
540 
541 	if (rdtgrp->type == RDTCTRL_GROUP)
542 		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
543 	else if (rdtgrp->type == RDTMON_GROUP)
544 		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
545 	else
546 		ret = -EINVAL;
547 
548 unlock:
549 	rdtgroup_kn_unlock(of->kn);
550 	free_cpumask_var(tmpmask);
551 	free_cpumask_var(newmask);
552 	free_cpumask_var(tmpmask1);
553 
554 	return ret ?: nbytes;
555 }
556 
557 /**
558  * rdtgroup_remove - the helper to remove resource group safely
559  * @rdtgrp: resource group to remove
560  *
561  * On resource group creation via a mkdir, an extra kernfs_node reference is
562  * taken to ensure that the rdtgroup structure remains accessible for the
563  * rdtgroup_kn_unlock() calls where it is removed.
564  *
565  * Drop the extra reference here, then free the rdtgroup structure.
566  *
567  * Return: void
568  */
rdtgroup_remove(struct rdtgroup * rdtgrp)569 static void rdtgroup_remove(struct rdtgroup *rdtgrp)
570 {
571 	kernfs_put(rdtgrp->kn);
572 	kfree(rdtgrp);
573 }
574 
_update_task_closid_rmid(void * task)575 static void _update_task_closid_rmid(void *task)
576 {
577 	/*
578 	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
579 	 * Otherwise, the MSR is updated when the task is scheduled in.
580 	 */
581 	if (task == current)
582 		resctrl_sched_in(task);
583 }
584 
update_task_closid_rmid(struct task_struct * t)585 static void update_task_closid_rmid(struct task_struct *t)
586 {
587 	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
588 		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
589 	else
590 		_update_task_closid_rmid(t);
591 }
592 
task_in_rdtgroup(struct task_struct * tsk,struct rdtgroup * rdtgrp)593 static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
594 {
595 	u32 closid, rmid = rdtgrp->mon.rmid;
596 
597 	if (rdtgrp->type == RDTCTRL_GROUP)
598 		closid = rdtgrp->closid;
599 	else if (rdtgrp->type == RDTMON_GROUP)
600 		closid = rdtgrp->mon.parent->closid;
601 	else
602 		return false;
603 
604 	return resctrl_arch_match_closid(tsk, closid) &&
605 	       resctrl_arch_match_rmid(tsk, closid, rmid);
606 }
607 
__rdtgroup_move_task(struct task_struct * tsk,struct rdtgroup * rdtgrp)608 static int __rdtgroup_move_task(struct task_struct *tsk,
609 				struct rdtgroup *rdtgrp)
610 {
611 	/* If the task is already in rdtgrp, no need to move the task. */
612 	if (task_in_rdtgroup(tsk, rdtgrp))
613 		return 0;
614 
615 	/*
616 	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
617 	 * updated by them.
618 	 *
619 	 * For ctrl_mon groups, move both closid and rmid.
620 	 * For monitor groups, can move the tasks only from
621 	 * their parent CTRL group.
622 	 */
623 	if (rdtgrp->type == RDTMON_GROUP &&
624 	    !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
625 		rdt_last_cmd_puts("Can't move task to different control group\n");
626 		return -EINVAL;
627 	}
628 
629 	if (rdtgrp->type == RDTMON_GROUP)
630 		resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
631 					     rdtgrp->mon.rmid);
632 	else
633 		resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
634 					     rdtgrp->mon.rmid);
635 
636 	/*
637 	 * Ensure the task's closid and rmid are written before determining if
638 	 * the task is current that will decide if it will be interrupted.
639 	 * This pairs with the full barrier between the rq->curr update and
640 	 * resctrl_sched_in() during context switch.
641 	 */
642 	smp_mb();
643 
644 	/*
645 	 * By now, the task's closid and rmid are set. If the task is current
646 	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
647 	 * group go into effect. If the task is not current, the MSR will be
648 	 * updated when the task is scheduled in.
649 	 */
650 	update_task_closid_rmid(tsk);
651 
652 	return 0;
653 }
654 
is_closid_match(struct task_struct * t,struct rdtgroup * r)655 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
656 {
657 	return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
658 		resctrl_arch_match_closid(t, r->closid));
659 }
660 
is_rmid_match(struct task_struct * t,struct rdtgroup * r)661 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
662 {
663 	return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
664 		resctrl_arch_match_rmid(t, r->mon.parent->closid,
665 					r->mon.rmid));
666 }
667 
668 /**
669  * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
670  * @r: Resource group
671  *
672  * Return: 1 if tasks have been assigned to @r, 0 otherwise
673  */
rdtgroup_tasks_assigned(struct rdtgroup * r)674 int rdtgroup_tasks_assigned(struct rdtgroup *r)
675 {
676 	struct task_struct *p, *t;
677 	int ret = 0;
678 
679 	lockdep_assert_held(&rdtgroup_mutex);
680 
681 	rcu_read_lock();
682 	for_each_process_thread(p, t) {
683 		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
684 			ret = 1;
685 			break;
686 		}
687 	}
688 	rcu_read_unlock();
689 
690 	return ret;
691 }
692 
rdtgroup_task_write_permission(struct task_struct * task,struct kernfs_open_file * of)693 static int rdtgroup_task_write_permission(struct task_struct *task,
694 					  struct kernfs_open_file *of)
695 {
696 	const struct cred *tcred = get_task_cred(task);
697 	const struct cred *cred = current_cred();
698 	int ret = 0;
699 
700 	/*
701 	 * Even if we're attaching all tasks in the thread group, we only
702 	 * need to check permissions on one of them.
703 	 */
704 	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
705 	    !uid_eq(cred->euid, tcred->uid) &&
706 	    !uid_eq(cred->euid, tcred->suid)) {
707 		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
708 		ret = -EPERM;
709 	}
710 
711 	put_cred(tcred);
712 	return ret;
713 }
714 
rdtgroup_move_task(pid_t pid,struct rdtgroup * rdtgrp,struct kernfs_open_file * of)715 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
716 			      struct kernfs_open_file *of)
717 {
718 	struct task_struct *tsk;
719 	int ret;
720 
721 	rcu_read_lock();
722 	if (pid) {
723 		tsk = find_task_by_vpid(pid);
724 		if (!tsk) {
725 			rcu_read_unlock();
726 			rdt_last_cmd_printf("No task %d\n", pid);
727 			return -ESRCH;
728 		}
729 	} else {
730 		tsk = current;
731 	}
732 
733 	get_task_struct(tsk);
734 	rcu_read_unlock();
735 
736 	ret = rdtgroup_task_write_permission(tsk, of);
737 	if (!ret)
738 		ret = __rdtgroup_move_task(tsk, rdtgrp);
739 
740 	put_task_struct(tsk);
741 	return ret;
742 }
743 
rdtgroup_tasks_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)744 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
745 				    char *buf, size_t nbytes, loff_t off)
746 {
747 	struct rdtgroup *rdtgrp;
748 	char *pid_str;
749 	int ret = 0;
750 	pid_t pid;
751 
752 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
753 	if (!rdtgrp) {
754 		rdtgroup_kn_unlock(of->kn);
755 		return -ENOENT;
756 	}
757 	rdt_last_cmd_clear();
758 
759 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
760 	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
761 		ret = -EINVAL;
762 		rdt_last_cmd_puts("Pseudo-locking in progress\n");
763 		goto unlock;
764 	}
765 
766 	while (buf && buf[0] != '\0' && buf[0] != '\n') {
767 		pid_str = strim(strsep(&buf, ","));
768 
769 		if (kstrtoint(pid_str, 0, &pid)) {
770 			rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
771 			ret = -EINVAL;
772 			break;
773 		}
774 
775 		if (pid < 0) {
776 			rdt_last_cmd_printf("Invalid pid %d\n", pid);
777 			ret = -EINVAL;
778 			break;
779 		}
780 
781 		ret = rdtgroup_move_task(pid, rdtgrp, of);
782 		if (ret) {
783 			rdt_last_cmd_printf("Error while processing task %d\n", pid);
784 			break;
785 		}
786 	}
787 
788 unlock:
789 	rdtgroup_kn_unlock(of->kn);
790 
791 	return ret ?: nbytes;
792 }
793 
show_rdt_tasks(struct rdtgroup * r,struct seq_file * s)794 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
795 {
796 	struct task_struct *p, *t;
797 	pid_t pid;
798 
799 	rcu_read_lock();
800 	for_each_process_thread(p, t) {
801 		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
802 			pid = task_pid_vnr(t);
803 			if (pid)
804 				seq_printf(s, "%d\n", pid);
805 		}
806 	}
807 	rcu_read_unlock();
808 }
809 
rdtgroup_tasks_show(struct kernfs_open_file * of,struct seq_file * s,void * v)810 static int rdtgroup_tasks_show(struct kernfs_open_file *of,
811 			       struct seq_file *s, void *v)
812 {
813 	struct rdtgroup *rdtgrp;
814 	int ret = 0;
815 
816 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
817 	if (rdtgrp)
818 		show_rdt_tasks(rdtgrp, s);
819 	else
820 		ret = -ENOENT;
821 	rdtgroup_kn_unlock(of->kn);
822 
823 	return ret;
824 }
825 
rdtgroup_closid_show(struct kernfs_open_file * of,struct seq_file * s,void * v)826 static int rdtgroup_closid_show(struct kernfs_open_file *of,
827 				struct seq_file *s, void *v)
828 {
829 	struct rdtgroup *rdtgrp;
830 	int ret = 0;
831 
832 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
833 	if (rdtgrp)
834 		seq_printf(s, "%u\n", rdtgrp->closid);
835 	else
836 		ret = -ENOENT;
837 	rdtgroup_kn_unlock(of->kn);
838 
839 	return ret;
840 }
841 
rdtgroup_rmid_show(struct kernfs_open_file * of,struct seq_file * s,void * v)842 static int rdtgroup_rmid_show(struct kernfs_open_file *of,
843 			      struct seq_file *s, void *v)
844 {
845 	struct rdtgroup *rdtgrp;
846 	int ret = 0;
847 
848 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
849 	if (rdtgrp)
850 		seq_printf(s, "%u\n", rdtgrp->mon.rmid);
851 	else
852 		ret = -ENOENT;
853 	rdtgroup_kn_unlock(of->kn);
854 
855 	return ret;
856 }
857 
858 #ifdef CONFIG_PROC_CPU_RESCTRL
859 
860 /*
861  * A task can only be part of one resctrl control group and of one monitor
862  * group which is associated to that control group.
863  *
864  * 1)   res:
865  *      mon:
866  *
867  *    resctrl is not available.
868  *
869  * 2)   res:/
870  *      mon:
871  *
872  *    Task is part of the root resctrl control group, and it is not associated
873  *    to any monitor group.
874  *
875  * 3)  res:/
876  *     mon:mon0
877  *
878  *    Task is part of the root resctrl control group and monitor group mon0.
879  *
880  * 4)  res:group0
881  *     mon:
882  *
883  *    Task is part of resctrl control group group0, and it is not associated
884  *    to any monitor group.
885  *
886  * 5) res:group0
887  *    mon:mon1
888  *
889  *    Task is part of resctrl control group group0 and monitor group mon1.
890  */
proc_resctrl_show(struct seq_file * s,struct pid_namespace * ns,struct pid * pid,struct task_struct * tsk)891 int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
892 		      struct pid *pid, struct task_struct *tsk)
893 {
894 	struct rdtgroup *rdtg;
895 	int ret = 0;
896 
897 	mutex_lock(&rdtgroup_mutex);
898 
899 	/* Return empty if resctrl has not been mounted. */
900 	if (!resctrl_mounted) {
901 		seq_puts(s, "res:\nmon:\n");
902 		goto unlock;
903 	}
904 
905 	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
906 		struct rdtgroup *crg;
907 
908 		/*
909 		 * Task information is only relevant for shareable
910 		 * and exclusive groups.
911 		 */
912 		if (rdtg->mode != RDT_MODE_SHAREABLE &&
913 		    rdtg->mode != RDT_MODE_EXCLUSIVE)
914 			continue;
915 
916 		if (!resctrl_arch_match_closid(tsk, rdtg->closid))
917 			continue;
918 
919 		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
920 			   rdtg->kn->name);
921 		seq_puts(s, "mon:");
922 		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
923 				    mon.crdtgrp_list) {
924 			if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
925 						     crg->mon.rmid))
926 				continue;
927 			seq_printf(s, "%s", crg->kn->name);
928 			break;
929 		}
930 		seq_putc(s, '\n');
931 		goto unlock;
932 	}
933 	/*
934 	 * The above search should succeed. Otherwise return
935 	 * with an error.
936 	 */
937 	ret = -ENOENT;
938 unlock:
939 	mutex_unlock(&rdtgroup_mutex);
940 
941 	return ret;
942 }
943 #endif
944 
rdt_last_cmd_status_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)945 static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
946 				    struct seq_file *seq, void *v)
947 {
948 	int len;
949 
950 	mutex_lock(&rdtgroup_mutex);
951 	len = seq_buf_used(&last_cmd_status);
952 	if (len)
953 		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
954 	else
955 		seq_puts(seq, "ok\n");
956 	mutex_unlock(&rdtgroup_mutex);
957 	return 0;
958 }
959 
rdt_num_closids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)960 static int rdt_num_closids_show(struct kernfs_open_file *of,
961 				struct seq_file *seq, void *v)
962 {
963 	struct resctrl_schema *s = of->kn->parent->priv;
964 
965 	seq_printf(seq, "%u\n", s->num_closid);
966 	return 0;
967 }
968 
rdt_default_ctrl_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)969 static int rdt_default_ctrl_show(struct kernfs_open_file *of,
970 			     struct seq_file *seq, void *v)
971 {
972 	struct resctrl_schema *s = of->kn->parent->priv;
973 	struct rdt_resource *r = s->res;
974 
975 	seq_printf(seq, "%x\n", r->default_ctrl);
976 	return 0;
977 }
978 
rdt_min_cbm_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)979 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
980 			     struct seq_file *seq, void *v)
981 {
982 	struct resctrl_schema *s = of->kn->parent->priv;
983 	struct rdt_resource *r = s->res;
984 
985 	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
986 	return 0;
987 }
988 
rdt_shareable_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)989 static int rdt_shareable_bits_show(struct kernfs_open_file *of,
990 				   struct seq_file *seq, void *v)
991 {
992 	struct resctrl_schema *s = of->kn->parent->priv;
993 	struct rdt_resource *r = s->res;
994 
995 	seq_printf(seq, "%x\n", r->cache.shareable_bits);
996 	return 0;
997 }
998 
999 /*
1000  * rdt_bit_usage_show - Display current usage of resources
1001  *
1002  * A domain is a shared resource that can now be allocated differently. Here
1003  * we display the current regions of the domain as an annotated bitmask.
1004  * For each domain of this resource its allocation bitmask
1005  * is annotated as below to indicate the current usage of the corresponding bit:
1006  *   0 - currently unused
1007  *   X - currently available for sharing and used by software and hardware
1008  *   H - currently used by hardware only but available for software use
1009  *   S - currently used and shareable by software only
1010  *   E - currently used exclusively by one resource group
1011  *   P - currently pseudo-locked by one resource group
1012  */
rdt_bit_usage_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1013 static int rdt_bit_usage_show(struct kernfs_open_file *of,
1014 			      struct seq_file *seq, void *v)
1015 {
1016 	struct resctrl_schema *s = of->kn->parent->priv;
1017 	/*
1018 	 * Use unsigned long even though only 32 bits are used to ensure
1019 	 * test_bit() is used safely.
1020 	 */
1021 	unsigned long sw_shareable = 0, hw_shareable = 0;
1022 	unsigned long exclusive = 0, pseudo_locked = 0;
1023 	struct rdt_resource *r = s->res;
1024 	struct rdt_ctrl_domain *dom;
1025 	int i, hwb, swb, excl, psl;
1026 	enum rdtgrp_mode mode;
1027 	bool sep = false;
1028 	u32 ctrl_val;
1029 
1030 	cpus_read_lock();
1031 	mutex_lock(&rdtgroup_mutex);
1032 	hw_shareable = r->cache.shareable_bits;
1033 	list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
1034 		if (sep)
1035 			seq_putc(seq, ';');
1036 		sw_shareable = 0;
1037 		exclusive = 0;
1038 		seq_printf(seq, "%d=", dom->hdr.id);
1039 		for (i = 0; i < closids_supported(); i++) {
1040 			if (!closid_allocated(i))
1041 				continue;
1042 			ctrl_val = resctrl_arch_get_config(r, dom, i,
1043 							   s->conf_type);
1044 			mode = rdtgroup_mode_by_closid(i);
1045 			switch (mode) {
1046 			case RDT_MODE_SHAREABLE:
1047 				sw_shareable |= ctrl_val;
1048 				break;
1049 			case RDT_MODE_EXCLUSIVE:
1050 				exclusive |= ctrl_val;
1051 				break;
1052 			case RDT_MODE_PSEUDO_LOCKSETUP:
1053 			/*
1054 			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
1055 			 * here but not included since the CBM
1056 			 * associated with this CLOSID in this mode
1057 			 * is not initialized and no task or cpu can be
1058 			 * assigned this CLOSID.
1059 			 */
1060 				break;
1061 			case RDT_MODE_PSEUDO_LOCKED:
1062 			case RDT_NUM_MODES:
1063 				WARN(1,
1064 				     "invalid mode for closid %d\n", i);
1065 				break;
1066 			}
1067 		}
1068 		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
1069 			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
1070 			hwb = test_bit(i, &hw_shareable);
1071 			swb = test_bit(i, &sw_shareable);
1072 			excl = test_bit(i, &exclusive);
1073 			psl = test_bit(i, &pseudo_locked);
1074 			if (hwb && swb)
1075 				seq_putc(seq, 'X');
1076 			else if (hwb && !swb)
1077 				seq_putc(seq, 'H');
1078 			else if (!hwb && swb)
1079 				seq_putc(seq, 'S');
1080 			else if (excl)
1081 				seq_putc(seq, 'E');
1082 			else if (psl)
1083 				seq_putc(seq, 'P');
1084 			else /* Unused bits remain */
1085 				seq_putc(seq, '0');
1086 		}
1087 		sep = true;
1088 	}
1089 	seq_putc(seq, '\n');
1090 	mutex_unlock(&rdtgroup_mutex);
1091 	cpus_read_unlock();
1092 	return 0;
1093 }
1094 
rdt_min_bw_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1095 static int rdt_min_bw_show(struct kernfs_open_file *of,
1096 			     struct seq_file *seq, void *v)
1097 {
1098 	struct resctrl_schema *s = of->kn->parent->priv;
1099 	struct rdt_resource *r = s->res;
1100 
1101 	seq_printf(seq, "%u\n", r->membw.min_bw);
1102 	return 0;
1103 }
1104 
rdt_num_rmids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1105 static int rdt_num_rmids_show(struct kernfs_open_file *of,
1106 			      struct seq_file *seq, void *v)
1107 {
1108 	struct rdt_resource *r = of->kn->parent->priv;
1109 
1110 	seq_printf(seq, "%d\n", r->num_rmid);
1111 
1112 	return 0;
1113 }
1114 
rdt_mon_features_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1115 static int rdt_mon_features_show(struct kernfs_open_file *of,
1116 				 struct seq_file *seq, void *v)
1117 {
1118 	struct rdt_resource *r = of->kn->parent->priv;
1119 	struct mon_evt *mevt;
1120 
1121 	list_for_each_entry(mevt, &r->evt_list, list) {
1122 		seq_printf(seq, "%s\n", mevt->name);
1123 		if (mevt->configurable)
1124 			seq_printf(seq, "%s_config\n", mevt->name);
1125 	}
1126 
1127 	return 0;
1128 }
1129 
rdt_bw_gran_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1130 static int rdt_bw_gran_show(struct kernfs_open_file *of,
1131 			     struct seq_file *seq, void *v)
1132 {
1133 	struct resctrl_schema *s = of->kn->parent->priv;
1134 	struct rdt_resource *r = s->res;
1135 
1136 	seq_printf(seq, "%u\n", r->membw.bw_gran);
1137 	return 0;
1138 }
1139 
rdt_delay_linear_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1140 static int rdt_delay_linear_show(struct kernfs_open_file *of,
1141 			     struct seq_file *seq, void *v)
1142 {
1143 	struct resctrl_schema *s = of->kn->parent->priv;
1144 	struct rdt_resource *r = s->res;
1145 
1146 	seq_printf(seq, "%u\n", r->membw.delay_linear);
1147 	return 0;
1148 }
1149 
max_threshold_occ_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1150 static int max_threshold_occ_show(struct kernfs_open_file *of,
1151 				  struct seq_file *seq, void *v)
1152 {
1153 	seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
1154 
1155 	return 0;
1156 }
1157 
rdt_thread_throttle_mode_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1158 static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
1159 					 struct seq_file *seq, void *v)
1160 {
1161 	struct resctrl_schema *s = of->kn->parent->priv;
1162 	struct rdt_resource *r = s->res;
1163 
1164 	if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
1165 		seq_puts(seq, "per-thread\n");
1166 	else
1167 		seq_puts(seq, "max\n");
1168 
1169 	return 0;
1170 }
1171 
max_threshold_occ_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1172 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
1173 				       char *buf, size_t nbytes, loff_t off)
1174 {
1175 	unsigned int bytes;
1176 	int ret;
1177 
1178 	ret = kstrtouint(buf, 0, &bytes);
1179 	if (ret)
1180 		return ret;
1181 
1182 	if (bytes > resctrl_rmid_realloc_limit)
1183 		return -EINVAL;
1184 
1185 	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
1186 
1187 	return nbytes;
1188 }
1189 
1190 /*
1191  * rdtgroup_mode_show - Display mode of this resource group
1192  */
rdtgroup_mode_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1193 static int rdtgroup_mode_show(struct kernfs_open_file *of,
1194 			      struct seq_file *s, void *v)
1195 {
1196 	struct rdtgroup *rdtgrp;
1197 
1198 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1199 	if (!rdtgrp) {
1200 		rdtgroup_kn_unlock(of->kn);
1201 		return -ENOENT;
1202 	}
1203 
1204 	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
1205 
1206 	rdtgroup_kn_unlock(of->kn);
1207 	return 0;
1208 }
1209 
resctrl_peer_type(enum resctrl_conf_type my_type)1210 static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
1211 {
1212 	switch (my_type) {
1213 	case CDP_CODE:
1214 		return CDP_DATA;
1215 	case CDP_DATA:
1216 		return CDP_CODE;
1217 	default:
1218 	case CDP_NONE:
1219 		return CDP_NONE;
1220 	}
1221 }
1222 
rdt_has_sparse_bitmasks_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1223 static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
1224 					struct seq_file *seq, void *v)
1225 {
1226 	struct resctrl_schema *s = of->kn->parent->priv;
1227 	struct rdt_resource *r = s->res;
1228 
1229 	seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
1230 
1231 	return 0;
1232 }
1233 
1234 /**
1235  * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1236  * @r: Resource to which domain instance @d belongs.
1237  * @d: The domain instance for which @closid is being tested.
1238  * @cbm: Capacity bitmask being tested.
1239  * @closid: Intended closid for @cbm.
1240  * @type: CDP type of @r.
1241  * @exclusive: Only check if overlaps with exclusive resource groups
1242  *
1243  * Checks if provided @cbm intended to be used for @closid on domain
1244  * @d overlaps with any other closids or other hardware usage associated
1245  * with this domain. If @exclusive is true then only overlaps with
1246  * resource groups in exclusive mode will be considered. If @exclusive
1247  * is false then overlaps with any resource group or hardware entities
1248  * will be considered.
1249  *
1250  * @cbm is unsigned long, even if only 32 bits are used, to make the
1251  * bitmap functions work correctly.
1252  *
1253  * Return: false if CBM does not overlap, true if it does.
1254  */
__rdtgroup_cbm_overlaps(struct rdt_resource * r,struct rdt_ctrl_domain * d,unsigned long cbm,int closid,enum resctrl_conf_type type,bool exclusive)1255 static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
1256 				    unsigned long cbm, int closid,
1257 				    enum resctrl_conf_type type, bool exclusive)
1258 {
1259 	enum rdtgrp_mode mode;
1260 	unsigned long ctrl_b;
1261 	int i;
1262 
1263 	/* Check for any overlap with regions used by hardware directly */
1264 	if (!exclusive) {
1265 		ctrl_b = r->cache.shareable_bits;
1266 		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1267 			return true;
1268 	}
1269 
1270 	/* Check for overlap with other resource groups */
1271 	for (i = 0; i < closids_supported(); i++) {
1272 		ctrl_b = resctrl_arch_get_config(r, d, i, type);
1273 		mode = rdtgroup_mode_by_closid(i);
1274 		if (closid_allocated(i) && i != closid &&
1275 		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1276 			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1277 				if (exclusive) {
1278 					if (mode == RDT_MODE_EXCLUSIVE)
1279 						return true;
1280 					continue;
1281 				}
1282 				return true;
1283 			}
1284 		}
1285 	}
1286 
1287 	return false;
1288 }
1289 
1290 /**
1291  * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1292  * @s: Schema for the resource to which domain instance @d belongs.
1293  * @d: The domain instance for which @closid is being tested.
1294  * @cbm: Capacity bitmask being tested.
1295  * @closid: Intended closid for @cbm.
1296  * @exclusive: Only check if overlaps with exclusive resource groups
1297  *
1298  * Resources that can be allocated using a CBM can use the CBM to control
1299  * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1300  * for overlap. Overlap test is not limited to the specific resource for
1301  * which the CBM is intended though - when dealing with CDP resources that
1302  * share the underlying hardware the overlap check should be performed on
1303  * the CDP resource sharing the hardware also.
1304  *
1305  * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1306  * overlap test.
1307  *
1308  * Return: true if CBM overlap detected, false if there is no overlap
1309  */
rdtgroup_cbm_overlaps(struct resctrl_schema * s,struct rdt_ctrl_domain * d,unsigned long cbm,int closid,bool exclusive)1310 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
1311 			   unsigned long cbm, int closid, bool exclusive)
1312 {
1313 	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
1314 	struct rdt_resource *r = s->res;
1315 
1316 	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
1317 				    exclusive))
1318 		return true;
1319 
1320 	if (!resctrl_arch_get_cdp_enabled(r->rid))
1321 		return false;
1322 	return  __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
1323 }
1324 
1325 /**
1326  * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1327  * @rdtgrp: Resource group identified through its closid.
1328  *
1329  * An exclusive resource group implies that there should be no sharing of
1330  * its allocated resources. At the time this group is considered to be
1331  * exclusive this test can determine if its current schemata supports this
1332  * setting by testing for overlap with all other resource groups.
1333  *
1334  * Return: true if resource group can be exclusive, false if there is overlap
1335  * with allocations of other resource groups and thus this resource group
1336  * cannot be exclusive.
1337  */
rdtgroup_mode_test_exclusive(struct rdtgroup * rdtgrp)1338 static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1339 {
1340 	int closid = rdtgrp->closid;
1341 	struct rdt_ctrl_domain *d;
1342 	struct resctrl_schema *s;
1343 	struct rdt_resource *r;
1344 	bool has_cache = false;
1345 	u32 ctrl;
1346 
1347 	/* Walking r->domains, ensure it can't race with cpuhp */
1348 	lockdep_assert_cpus_held();
1349 
1350 	list_for_each_entry(s, &resctrl_schema_all, list) {
1351 		r = s->res;
1352 		if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
1353 			continue;
1354 		has_cache = true;
1355 		list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1356 			ctrl = resctrl_arch_get_config(r, d, closid,
1357 						       s->conf_type);
1358 			if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
1359 				rdt_last_cmd_puts("Schemata overlaps\n");
1360 				return false;
1361 			}
1362 		}
1363 	}
1364 
1365 	if (!has_cache) {
1366 		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1367 		return false;
1368 	}
1369 
1370 	return true;
1371 }
1372 
1373 /*
1374  * rdtgroup_mode_write - Modify the resource group's mode
1375  */
rdtgroup_mode_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1376 static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1377 				   char *buf, size_t nbytes, loff_t off)
1378 {
1379 	struct rdtgroup *rdtgrp;
1380 	enum rdtgrp_mode mode;
1381 	int ret = 0;
1382 
1383 	/* Valid input requires a trailing newline */
1384 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1385 		return -EINVAL;
1386 	buf[nbytes - 1] = '\0';
1387 
1388 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1389 	if (!rdtgrp) {
1390 		rdtgroup_kn_unlock(of->kn);
1391 		return -ENOENT;
1392 	}
1393 
1394 	rdt_last_cmd_clear();
1395 
1396 	mode = rdtgrp->mode;
1397 
1398 	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1399 	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1400 	    (!strcmp(buf, "pseudo-locksetup") &&
1401 	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1402 	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1403 		goto out;
1404 
1405 	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1406 		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1407 		ret = -EINVAL;
1408 		goto out;
1409 	}
1410 
1411 	if (!strcmp(buf, "shareable")) {
1412 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1413 			ret = rdtgroup_locksetup_exit(rdtgrp);
1414 			if (ret)
1415 				goto out;
1416 		}
1417 		rdtgrp->mode = RDT_MODE_SHAREABLE;
1418 	} else if (!strcmp(buf, "exclusive")) {
1419 		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1420 			ret = -EINVAL;
1421 			goto out;
1422 		}
1423 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1424 			ret = rdtgroup_locksetup_exit(rdtgrp);
1425 			if (ret)
1426 				goto out;
1427 		}
1428 		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1429 	} else if (!strcmp(buf, "pseudo-locksetup")) {
1430 		ret = rdtgroup_locksetup_enter(rdtgrp);
1431 		if (ret)
1432 			goto out;
1433 		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1434 	} else {
1435 		rdt_last_cmd_puts("Unknown or unsupported mode\n");
1436 		ret = -EINVAL;
1437 	}
1438 
1439 out:
1440 	rdtgroup_kn_unlock(of->kn);
1441 	return ret ?: nbytes;
1442 }
1443 
1444 /**
1445  * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1446  * @r: RDT resource to which @d belongs.
1447  * @d: RDT domain instance.
1448  * @cbm: bitmask for which the size should be computed.
1449  *
1450  * The bitmask provided associated with the RDT domain instance @d will be
1451  * translated into how many bytes it represents. The size in bytes is
1452  * computed by first dividing the total cache size by the CBM length to
1453  * determine how many bytes each bit in the bitmask represents. The result
1454  * is multiplied with the number of bits set in the bitmask.
1455  *
1456  * @cbm is unsigned long, even if only 32 bits are used to make the
1457  * bitmap functions work correctly.
1458  */
rdtgroup_cbm_to_size(struct rdt_resource * r,struct rdt_ctrl_domain * d,unsigned long cbm)1459 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1460 				  struct rdt_ctrl_domain *d, unsigned long cbm)
1461 {
1462 	unsigned int size = 0;
1463 	struct cacheinfo *ci;
1464 	int num_b;
1465 
1466 	if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
1467 		return size;
1468 
1469 	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1470 	ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
1471 	if (ci)
1472 		size = ci->size / r->cache.cbm_len * num_b;
1473 
1474 	return size;
1475 }
1476 
1477 /*
1478  * rdtgroup_size_show - Display size in bytes of allocated regions
1479  *
1480  * The "size" file mirrors the layout of the "schemata" file, printing the
1481  * size in bytes of each region instead of the capacity bitmask.
1482  */
rdtgroup_size_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1483 static int rdtgroup_size_show(struct kernfs_open_file *of,
1484 			      struct seq_file *s, void *v)
1485 {
1486 	struct resctrl_schema *schema;
1487 	enum resctrl_conf_type type;
1488 	struct rdt_ctrl_domain *d;
1489 	struct rdtgroup *rdtgrp;
1490 	struct rdt_resource *r;
1491 	unsigned int size;
1492 	int ret = 0;
1493 	u32 closid;
1494 	bool sep;
1495 	u32 ctrl;
1496 
1497 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1498 	if (!rdtgrp) {
1499 		rdtgroup_kn_unlock(of->kn);
1500 		return -ENOENT;
1501 	}
1502 
1503 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1504 		if (!rdtgrp->plr->d) {
1505 			rdt_last_cmd_clear();
1506 			rdt_last_cmd_puts("Cache domain offline\n");
1507 			ret = -ENODEV;
1508 		} else {
1509 			seq_printf(s, "%*s:", max_name_width,
1510 				   rdtgrp->plr->s->name);
1511 			size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
1512 						    rdtgrp->plr->d,
1513 						    rdtgrp->plr->cbm);
1514 			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
1515 		}
1516 		goto out;
1517 	}
1518 
1519 	closid = rdtgrp->closid;
1520 
1521 	list_for_each_entry(schema, &resctrl_schema_all, list) {
1522 		r = schema->res;
1523 		type = schema->conf_type;
1524 		sep = false;
1525 		seq_printf(s, "%*s:", max_name_width, schema->name);
1526 		list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1527 			if (sep)
1528 				seq_putc(s, ';');
1529 			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1530 				size = 0;
1531 			} else {
1532 				if (is_mba_sc(r))
1533 					ctrl = d->mbps_val[closid];
1534 				else
1535 					ctrl = resctrl_arch_get_config(r, d,
1536 								       closid,
1537 								       type);
1538 				if (r->rid == RDT_RESOURCE_MBA ||
1539 				    r->rid == RDT_RESOURCE_SMBA)
1540 					size = ctrl;
1541 				else
1542 					size = rdtgroup_cbm_to_size(r, d, ctrl);
1543 			}
1544 			seq_printf(s, "%d=%u", d->hdr.id, size);
1545 			sep = true;
1546 		}
1547 		seq_putc(s, '\n');
1548 	}
1549 
1550 out:
1551 	rdtgroup_kn_unlock(of->kn);
1552 
1553 	return ret;
1554 }
1555 
1556 struct mon_config_info {
1557 	u32 evtid;
1558 	u32 mon_config;
1559 };
1560 
1561 #define INVALID_CONFIG_INDEX   UINT_MAX
1562 
1563 /**
1564  * mon_event_config_index_get - get the hardware index for the
1565  *                              configurable event
1566  * @evtid: event id.
1567  *
1568  * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
1569  *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
1570  *         INVALID_CONFIG_INDEX for invalid evtid
1571  */
mon_event_config_index_get(u32 evtid)1572 static inline unsigned int mon_event_config_index_get(u32 evtid)
1573 {
1574 	switch (evtid) {
1575 	case QOS_L3_MBM_TOTAL_EVENT_ID:
1576 		return 0;
1577 	case QOS_L3_MBM_LOCAL_EVENT_ID:
1578 		return 1;
1579 	default:
1580 		/* Should never reach here */
1581 		return INVALID_CONFIG_INDEX;
1582 	}
1583 }
1584 
mon_event_config_read(void * info)1585 static void mon_event_config_read(void *info)
1586 {
1587 	struct mon_config_info *mon_info = info;
1588 	unsigned int index;
1589 	u64 msrval;
1590 
1591 	index = mon_event_config_index_get(mon_info->evtid);
1592 	if (index == INVALID_CONFIG_INDEX) {
1593 		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
1594 		return;
1595 	}
1596 	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
1597 
1598 	/* Report only the valid event configuration bits */
1599 	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
1600 }
1601 
mondata_config_read(struct rdt_mon_domain * d,struct mon_config_info * mon_info)1602 static void mondata_config_read(struct rdt_mon_domain *d, struct mon_config_info *mon_info)
1603 {
1604 	smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_read, mon_info, 1);
1605 }
1606 
mbm_config_show(struct seq_file * s,struct rdt_resource * r,u32 evtid)1607 static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
1608 {
1609 	struct mon_config_info mon_info;
1610 	struct rdt_mon_domain *dom;
1611 	bool sep = false;
1612 
1613 	cpus_read_lock();
1614 	mutex_lock(&rdtgroup_mutex);
1615 
1616 	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1617 		if (sep)
1618 			seq_puts(s, ";");
1619 
1620 		memset(&mon_info, 0, sizeof(struct mon_config_info));
1621 		mon_info.evtid = evtid;
1622 		mondata_config_read(dom, &mon_info);
1623 
1624 		seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
1625 		sep = true;
1626 	}
1627 	seq_puts(s, "\n");
1628 
1629 	mutex_unlock(&rdtgroup_mutex);
1630 	cpus_read_unlock();
1631 
1632 	return 0;
1633 }
1634 
mbm_total_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1635 static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
1636 				       struct seq_file *seq, void *v)
1637 {
1638 	struct rdt_resource *r = of->kn->parent->priv;
1639 
1640 	mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
1641 
1642 	return 0;
1643 }
1644 
mbm_local_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1645 static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
1646 				       struct seq_file *seq, void *v)
1647 {
1648 	struct rdt_resource *r = of->kn->parent->priv;
1649 
1650 	mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
1651 
1652 	return 0;
1653 }
1654 
mon_event_config_write(void * info)1655 static void mon_event_config_write(void *info)
1656 {
1657 	struct mon_config_info *mon_info = info;
1658 	unsigned int index;
1659 
1660 	index = mon_event_config_index_get(mon_info->evtid);
1661 	if (index == INVALID_CONFIG_INDEX) {
1662 		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
1663 		return;
1664 	}
1665 	wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
1666 }
1667 
mbm_config_write_domain(struct rdt_resource * r,struct rdt_mon_domain * d,u32 evtid,u32 val)1668 static void mbm_config_write_domain(struct rdt_resource *r,
1669 				    struct rdt_mon_domain *d, u32 evtid, u32 val)
1670 {
1671 	struct mon_config_info mon_info = {0};
1672 
1673 	/*
1674 	 * Read the current config value first. If both are the same then
1675 	 * no need to write it again.
1676 	 */
1677 	mon_info.evtid = evtid;
1678 	mondata_config_read(d, &mon_info);
1679 	if (mon_info.mon_config == val)
1680 		return;
1681 
1682 	mon_info.mon_config = val;
1683 
1684 	/*
1685 	 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
1686 	 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
1687 	 * are scoped at the domain level. Writing any of these MSRs
1688 	 * on one CPU is observed by all the CPUs in the domain.
1689 	 */
1690 	smp_call_function_any(&d->hdr.cpu_mask, mon_event_config_write,
1691 			      &mon_info, 1);
1692 
1693 	/*
1694 	 * When an Event Configuration is changed, the bandwidth counters
1695 	 * for all RMIDs and Events will be cleared by the hardware. The
1696 	 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
1697 	 * every RMID on the next read to any event for every RMID.
1698 	 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
1699 	 * cleared while it is tracked by the hardware. Clear the
1700 	 * mbm_local and mbm_total counts for all the RMIDs.
1701 	 */
1702 	resctrl_arch_reset_rmid_all(r, d);
1703 }
1704 
mon_config_write(struct rdt_resource * r,char * tok,u32 evtid)1705 static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
1706 {
1707 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
1708 	char *dom_str = NULL, *id_str;
1709 	unsigned long dom_id, val;
1710 	struct rdt_mon_domain *d;
1711 
1712 	/* Walking r->domains, ensure it can't race with cpuhp */
1713 	lockdep_assert_cpus_held();
1714 
1715 next:
1716 	if (!tok || tok[0] == '\0')
1717 		return 0;
1718 
1719 	/* Start processing the strings for each domain */
1720 	dom_str = strim(strsep(&tok, ";"));
1721 	id_str = strsep(&dom_str, "=");
1722 
1723 	if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1724 		rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
1725 		return -EINVAL;
1726 	}
1727 
1728 	if (!dom_str || kstrtoul(dom_str, 16, &val)) {
1729 		rdt_last_cmd_puts("Non-numeric event configuration value\n");
1730 		return -EINVAL;
1731 	}
1732 
1733 	/* Value from user cannot be more than the supported set of events */
1734 	if ((val & hw_res->mbm_cfg_mask) != val) {
1735 		rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
1736 				    hw_res->mbm_cfg_mask);
1737 		return -EINVAL;
1738 	}
1739 
1740 	list_for_each_entry(d, &r->mon_domains, hdr.list) {
1741 		if (d->hdr.id == dom_id) {
1742 			mbm_config_write_domain(r, d, evtid, val);
1743 			goto next;
1744 		}
1745 	}
1746 
1747 	return -EINVAL;
1748 }
1749 
mbm_total_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1750 static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
1751 					    char *buf, size_t nbytes,
1752 					    loff_t off)
1753 {
1754 	struct rdt_resource *r = of->kn->parent->priv;
1755 	int ret;
1756 
1757 	/* Valid input requires a trailing newline */
1758 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1759 		return -EINVAL;
1760 
1761 	cpus_read_lock();
1762 	mutex_lock(&rdtgroup_mutex);
1763 
1764 	rdt_last_cmd_clear();
1765 
1766 	buf[nbytes - 1] = '\0';
1767 
1768 	ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
1769 
1770 	mutex_unlock(&rdtgroup_mutex);
1771 	cpus_read_unlock();
1772 
1773 	return ret ?: nbytes;
1774 }
1775 
mbm_local_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1776 static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
1777 					    char *buf, size_t nbytes,
1778 					    loff_t off)
1779 {
1780 	struct rdt_resource *r = of->kn->parent->priv;
1781 	int ret;
1782 
1783 	/* Valid input requires a trailing newline */
1784 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1785 		return -EINVAL;
1786 
1787 	cpus_read_lock();
1788 	mutex_lock(&rdtgroup_mutex);
1789 
1790 	rdt_last_cmd_clear();
1791 
1792 	buf[nbytes - 1] = '\0';
1793 
1794 	ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
1795 
1796 	mutex_unlock(&rdtgroup_mutex);
1797 	cpus_read_unlock();
1798 
1799 	return ret ?: nbytes;
1800 }
1801 
1802 /* rdtgroup information files for one cache resource. */
1803 static struct rftype res_common_files[] = {
1804 	{
1805 		.name		= "last_cmd_status",
1806 		.mode		= 0444,
1807 		.kf_ops		= &rdtgroup_kf_single_ops,
1808 		.seq_show	= rdt_last_cmd_status_show,
1809 		.fflags		= RFTYPE_TOP_INFO,
1810 	},
1811 	{
1812 		.name		= "num_closids",
1813 		.mode		= 0444,
1814 		.kf_ops		= &rdtgroup_kf_single_ops,
1815 		.seq_show	= rdt_num_closids_show,
1816 		.fflags		= RFTYPE_CTRL_INFO,
1817 	},
1818 	{
1819 		.name		= "mon_features",
1820 		.mode		= 0444,
1821 		.kf_ops		= &rdtgroup_kf_single_ops,
1822 		.seq_show	= rdt_mon_features_show,
1823 		.fflags		= RFTYPE_MON_INFO,
1824 	},
1825 	{
1826 		.name		= "num_rmids",
1827 		.mode		= 0444,
1828 		.kf_ops		= &rdtgroup_kf_single_ops,
1829 		.seq_show	= rdt_num_rmids_show,
1830 		.fflags		= RFTYPE_MON_INFO,
1831 	},
1832 	{
1833 		.name		= "cbm_mask",
1834 		.mode		= 0444,
1835 		.kf_ops		= &rdtgroup_kf_single_ops,
1836 		.seq_show	= rdt_default_ctrl_show,
1837 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1838 	},
1839 	{
1840 		.name		= "min_cbm_bits",
1841 		.mode		= 0444,
1842 		.kf_ops		= &rdtgroup_kf_single_ops,
1843 		.seq_show	= rdt_min_cbm_bits_show,
1844 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1845 	},
1846 	{
1847 		.name		= "shareable_bits",
1848 		.mode		= 0444,
1849 		.kf_ops		= &rdtgroup_kf_single_ops,
1850 		.seq_show	= rdt_shareable_bits_show,
1851 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1852 	},
1853 	{
1854 		.name		= "bit_usage",
1855 		.mode		= 0444,
1856 		.kf_ops		= &rdtgroup_kf_single_ops,
1857 		.seq_show	= rdt_bit_usage_show,
1858 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1859 	},
1860 	{
1861 		.name		= "min_bandwidth",
1862 		.mode		= 0444,
1863 		.kf_ops		= &rdtgroup_kf_single_ops,
1864 		.seq_show	= rdt_min_bw_show,
1865 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1866 	},
1867 	{
1868 		.name		= "bandwidth_gran",
1869 		.mode		= 0444,
1870 		.kf_ops		= &rdtgroup_kf_single_ops,
1871 		.seq_show	= rdt_bw_gran_show,
1872 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1873 	},
1874 	{
1875 		.name		= "delay_linear",
1876 		.mode		= 0444,
1877 		.kf_ops		= &rdtgroup_kf_single_ops,
1878 		.seq_show	= rdt_delay_linear_show,
1879 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
1880 	},
1881 	/*
1882 	 * Platform specific which (if any) capabilities are provided by
1883 	 * thread_throttle_mode. Defer "fflags" initialization to platform
1884 	 * discovery.
1885 	 */
1886 	{
1887 		.name		= "thread_throttle_mode",
1888 		.mode		= 0444,
1889 		.kf_ops		= &rdtgroup_kf_single_ops,
1890 		.seq_show	= rdt_thread_throttle_mode_show,
1891 	},
1892 	{
1893 		.name		= "max_threshold_occupancy",
1894 		.mode		= 0644,
1895 		.kf_ops		= &rdtgroup_kf_single_ops,
1896 		.write		= max_threshold_occ_write,
1897 		.seq_show	= max_threshold_occ_show,
1898 		.fflags		= RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
1899 	},
1900 	{
1901 		.name		= "mbm_total_bytes_config",
1902 		.mode		= 0644,
1903 		.kf_ops		= &rdtgroup_kf_single_ops,
1904 		.seq_show	= mbm_total_bytes_config_show,
1905 		.write		= mbm_total_bytes_config_write,
1906 	},
1907 	{
1908 		.name		= "mbm_local_bytes_config",
1909 		.mode		= 0644,
1910 		.kf_ops		= &rdtgroup_kf_single_ops,
1911 		.seq_show	= mbm_local_bytes_config_show,
1912 		.write		= mbm_local_bytes_config_write,
1913 	},
1914 	{
1915 		.name		= "cpus",
1916 		.mode		= 0644,
1917 		.kf_ops		= &rdtgroup_kf_single_ops,
1918 		.write		= rdtgroup_cpus_write,
1919 		.seq_show	= rdtgroup_cpus_show,
1920 		.fflags		= RFTYPE_BASE,
1921 	},
1922 	{
1923 		.name		= "cpus_list",
1924 		.mode		= 0644,
1925 		.kf_ops		= &rdtgroup_kf_single_ops,
1926 		.write		= rdtgroup_cpus_write,
1927 		.seq_show	= rdtgroup_cpus_show,
1928 		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1929 		.fflags		= RFTYPE_BASE,
1930 	},
1931 	{
1932 		.name		= "tasks",
1933 		.mode		= 0644,
1934 		.kf_ops		= &rdtgroup_kf_single_ops,
1935 		.write		= rdtgroup_tasks_write,
1936 		.seq_show	= rdtgroup_tasks_show,
1937 		.fflags		= RFTYPE_BASE,
1938 	},
1939 	{
1940 		.name		= "mon_hw_id",
1941 		.mode		= 0444,
1942 		.kf_ops		= &rdtgroup_kf_single_ops,
1943 		.seq_show	= rdtgroup_rmid_show,
1944 		.fflags		= RFTYPE_MON_BASE | RFTYPE_DEBUG,
1945 	},
1946 	{
1947 		.name		= "schemata",
1948 		.mode		= 0644,
1949 		.kf_ops		= &rdtgroup_kf_single_ops,
1950 		.write		= rdtgroup_schemata_write,
1951 		.seq_show	= rdtgroup_schemata_show,
1952 		.fflags		= RFTYPE_CTRL_BASE,
1953 	},
1954 	{
1955 		.name		= "mba_MBps_event",
1956 		.mode		= 0644,
1957 		.kf_ops		= &rdtgroup_kf_single_ops,
1958 		.write		= rdtgroup_mba_mbps_event_write,
1959 		.seq_show	= rdtgroup_mba_mbps_event_show,
1960 	},
1961 	{
1962 		.name		= "mode",
1963 		.mode		= 0644,
1964 		.kf_ops		= &rdtgroup_kf_single_ops,
1965 		.write		= rdtgroup_mode_write,
1966 		.seq_show	= rdtgroup_mode_show,
1967 		.fflags		= RFTYPE_CTRL_BASE,
1968 	},
1969 	{
1970 		.name		= "size",
1971 		.mode		= 0444,
1972 		.kf_ops		= &rdtgroup_kf_single_ops,
1973 		.seq_show	= rdtgroup_size_show,
1974 		.fflags		= RFTYPE_CTRL_BASE,
1975 	},
1976 	{
1977 		.name		= "sparse_masks",
1978 		.mode		= 0444,
1979 		.kf_ops		= &rdtgroup_kf_single_ops,
1980 		.seq_show	= rdt_has_sparse_bitmasks_show,
1981 		.fflags		= RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
1982 	},
1983 	{
1984 		.name		= "ctrl_hw_id",
1985 		.mode		= 0444,
1986 		.kf_ops		= &rdtgroup_kf_single_ops,
1987 		.seq_show	= rdtgroup_closid_show,
1988 		.fflags		= RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
1989 	},
1990 
1991 };
1992 
rdtgroup_add_files(struct kernfs_node * kn,unsigned long fflags)1993 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1994 {
1995 	struct rftype *rfts, *rft;
1996 	int ret, len;
1997 
1998 	rfts = res_common_files;
1999 	len = ARRAY_SIZE(res_common_files);
2000 
2001 	lockdep_assert_held(&rdtgroup_mutex);
2002 
2003 	if (resctrl_debug)
2004 		fflags |= RFTYPE_DEBUG;
2005 
2006 	for (rft = rfts; rft < rfts + len; rft++) {
2007 		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
2008 			ret = rdtgroup_add_file(kn, rft);
2009 			if (ret)
2010 				goto error;
2011 		}
2012 	}
2013 
2014 	return 0;
2015 error:
2016 	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
2017 	while (--rft >= rfts) {
2018 		if ((fflags & rft->fflags) == rft->fflags)
2019 			kernfs_remove_by_name(kn, rft->name);
2020 	}
2021 	return ret;
2022 }
2023 
rdtgroup_get_rftype_by_name(const char * name)2024 static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
2025 {
2026 	struct rftype *rfts, *rft;
2027 	int len;
2028 
2029 	rfts = res_common_files;
2030 	len = ARRAY_SIZE(res_common_files);
2031 
2032 	for (rft = rfts; rft < rfts + len; rft++) {
2033 		if (!strcmp(rft->name, name))
2034 			return rft;
2035 	}
2036 
2037 	return NULL;
2038 }
2039 
resctrl_file_fflags_init(const char * config,unsigned long fflags)2040 void resctrl_file_fflags_init(const char *config, unsigned long fflags)
2041 {
2042 	struct rftype *rft;
2043 
2044 	rft = rdtgroup_get_rftype_by_name(config);
2045 	if (rft)
2046 		rft->fflags = fflags;
2047 }
2048 
2049 /**
2050  * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
2051  * @r: The resource group with which the file is associated.
2052  * @name: Name of the file
2053  *
2054  * The permissions of named resctrl file, directory, or link are modified
2055  * to not allow read, write, or execute by any user.
2056  *
2057  * WARNING: This function is intended to communicate to the user that the
2058  * resctrl file has been locked down - that it is not relevant to the
2059  * particular state the system finds itself in. It should not be relied
2060  * on to protect from user access because after the file's permissions
2061  * are restricted the user can still change the permissions using chmod
2062  * from the command line.
2063  *
2064  * Return: 0 on success, <0 on failure.
2065  */
rdtgroup_kn_mode_restrict(struct rdtgroup * r,const char * name)2066 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
2067 {
2068 	struct iattr iattr = {.ia_valid = ATTR_MODE,};
2069 	struct kernfs_node *kn;
2070 	int ret = 0;
2071 
2072 	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2073 	if (!kn)
2074 		return -ENOENT;
2075 
2076 	switch (kernfs_type(kn)) {
2077 	case KERNFS_DIR:
2078 		iattr.ia_mode = S_IFDIR;
2079 		break;
2080 	case KERNFS_FILE:
2081 		iattr.ia_mode = S_IFREG;
2082 		break;
2083 	case KERNFS_LINK:
2084 		iattr.ia_mode = S_IFLNK;
2085 		break;
2086 	}
2087 
2088 	ret = kernfs_setattr(kn, &iattr);
2089 	kernfs_put(kn);
2090 	return ret;
2091 }
2092 
2093 /**
2094  * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
2095  * @r: The resource group with which the file is associated.
2096  * @name: Name of the file
2097  * @mask: Mask of permissions that should be restored
2098  *
2099  * Restore the permissions of the named file. If @name is a directory the
2100  * permissions of its parent will be used.
2101  *
2102  * Return: 0 on success, <0 on failure.
2103  */
rdtgroup_kn_mode_restore(struct rdtgroup * r,const char * name,umode_t mask)2104 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
2105 			     umode_t mask)
2106 {
2107 	struct iattr iattr = {.ia_valid = ATTR_MODE,};
2108 	struct kernfs_node *kn, *parent;
2109 	struct rftype *rfts, *rft;
2110 	int ret, len;
2111 
2112 	rfts = res_common_files;
2113 	len = ARRAY_SIZE(res_common_files);
2114 
2115 	for (rft = rfts; rft < rfts + len; rft++) {
2116 		if (!strcmp(rft->name, name))
2117 			iattr.ia_mode = rft->mode & mask;
2118 	}
2119 
2120 	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
2121 	if (!kn)
2122 		return -ENOENT;
2123 
2124 	switch (kernfs_type(kn)) {
2125 	case KERNFS_DIR:
2126 		parent = kernfs_get_parent(kn);
2127 		if (parent) {
2128 			iattr.ia_mode |= parent->mode;
2129 			kernfs_put(parent);
2130 		}
2131 		iattr.ia_mode |= S_IFDIR;
2132 		break;
2133 	case KERNFS_FILE:
2134 		iattr.ia_mode |= S_IFREG;
2135 		break;
2136 	case KERNFS_LINK:
2137 		iattr.ia_mode |= S_IFLNK;
2138 		break;
2139 	}
2140 
2141 	ret = kernfs_setattr(kn, &iattr);
2142 	kernfs_put(kn);
2143 	return ret;
2144 }
2145 
rdtgroup_mkdir_info_resdir(void * priv,char * name,unsigned long fflags)2146 static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
2147 				      unsigned long fflags)
2148 {
2149 	struct kernfs_node *kn_subdir;
2150 	int ret;
2151 
2152 	kn_subdir = kernfs_create_dir(kn_info, name,
2153 				      kn_info->mode, priv);
2154 	if (IS_ERR(kn_subdir))
2155 		return PTR_ERR(kn_subdir);
2156 
2157 	ret = rdtgroup_kn_set_ugid(kn_subdir);
2158 	if (ret)
2159 		return ret;
2160 
2161 	ret = rdtgroup_add_files(kn_subdir, fflags);
2162 	if (!ret)
2163 		kernfs_activate(kn_subdir);
2164 
2165 	return ret;
2166 }
2167 
rdtgroup_create_info_dir(struct kernfs_node * parent_kn)2168 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
2169 {
2170 	struct resctrl_schema *s;
2171 	struct rdt_resource *r;
2172 	unsigned long fflags;
2173 	char name[32];
2174 	int ret;
2175 
2176 	/* create the directory */
2177 	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
2178 	if (IS_ERR(kn_info))
2179 		return PTR_ERR(kn_info);
2180 
2181 	ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
2182 	if (ret)
2183 		goto out_destroy;
2184 
2185 	/* loop over enabled controls, these are all alloc_capable */
2186 	list_for_each_entry(s, &resctrl_schema_all, list) {
2187 		r = s->res;
2188 		fflags = r->fflags | RFTYPE_CTRL_INFO;
2189 		ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
2190 		if (ret)
2191 			goto out_destroy;
2192 	}
2193 
2194 	for_each_mon_capable_rdt_resource(r) {
2195 		fflags = r->fflags | RFTYPE_MON_INFO;
2196 		sprintf(name, "%s_MON", r->name);
2197 		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
2198 		if (ret)
2199 			goto out_destroy;
2200 	}
2201 
2202 	ret = rdtgroup_kn_set_ugid(kn_info);
2203 	if (ret)
2204 		goto out_destroy;
2205 
2206 	kernfs_activate(kn_info);
2207 
2208 	return 0;
2209 
2210 out_destroy:
2211 	kernfs_remove(kn_info);
2212 	return ret;
2213 }
2214 
2215 static int
mongroup_create_dir(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,char * name,struct kernfs_node ** dest_kn)2216 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
2217 		    char *name, struct kernfs_node **dest_kn)
2218 {
2219 	struct kernfs_node *kn;
2220 	int ret;
2221 
2222 	/* create the directory */
2223 	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2224 	if (IS_ERR(kn))
2225 		return PTR_ERR(kn);
2226 
2227 	if (dest_kn)
2228 		*dest_kn = kn;
2229 
2230 	ret = rdtgroup_kn_set_ugid(kn);
2231 	if (ret)
2232 		goto out_destroy;
2233 
2234 	kernfs_activate(kn);
2235 
2236 	return 0;
2237 
2238 out_destroy:
2239 	kernfs_remove(kn);
2240 	return ret;
2241 }
2242 
l3_qos_cfg_update(void * arg)2243 static void l3_qos_cfg_update(void *arg)
2244 {
2245 	bool *enable = arg;
2246 
2247 	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
2248 }
2249 
l2_qos_cfg_update(void * arg)2250 static void l2_qos_cfg_update(void *arg)
2251 {
2252 	bool *enable = arg;
2253 
2254 	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
2255 }
2256 
is_mba_linear(void)2257 static inline bool is_mba_linear(void)
2258 {
2259 	return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
2260 }
2261 
set_cache_qos_cfg(int level,bool enable)2262 static int set_cache_qos_cfg(int level, bool enable)
2263 {
2264 	void (*update)(void *arg);
2265 	struct rdt_ctrl_domain *d;
2266 	struct rdt_resource *r_l;
2267 	cpumask_var_t cpu_mask;
2268 	int cpu;
2269 
2270 	/* Walking r->domains, ensure it can't race with cpuhp */
2271 	lockdep_assert_cpus_held();
2272 
2273 	if (level == RDT_RESOURCE_L3)
2274 		update = l3_qos_cfg_update;
2275 	else if (level == RDT_RESOURCE_L2)
2276 		update = l2_qos_cfg_update;
2277 	else
2278 		return -EINVAL;
2279 
2280 	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2281 		return -ENOMEM;
2282 
2283 	r_l = &rdt_resources_all[level].r_resctrl;
2284 	list_for_each_entry(d, &r_l->ctrl_domains, hdr.list) {
2285 		if (r_l->cache.arch_has_per_cpu_cfg)
2286 			/* Pick all the CPUs in the domain instance */
2287 			for_each_cpu(cpu, &d->hdr.cpu_mask)
2288 				cpumask_set_cpu(cpu, cpu_mask);
2289 		else
2290 			/* Pick one CPU from each domain instance to update MSR */
2291 			cpumask_set_cpu(cpumask_any(&d->hdr.cpu_mask), cpu_mask);
2292 	}
2293 
2294 	/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
2295 	on_each_cpu_mask(cpu_mask, update, &enable, 1);
2296 
2297 	free_cpumask_var(cpu_mask);
2298 
2299 	return 0;
2300 }
2301 
2302 /* Restore the qos cfg state when a domain comes online */
rdt_domain_reconfigure_cdp(struct rdt_resource * r)2303 void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
2304 {
2305 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2306 
2307 	if (!r->cdp_capable)
2308 		return;
2309 
2310 	if (r->rid == RDT_RESOURCE_L2)
2311 		l2_qos_cfg_update(&hw_res->cdp_enabled);
2312 
2313 	if (r->rid == RDT_RESOURCE_L3)
2314 		l3_qos_cfg_update(&hw_res->cdp_enabled);
2315 }
2316 
mba_sc_domain_allocate(struct rdt_resource * r,struct rdt_ctrl_domain * d)2317 static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
2318 {
2319 	u32 num_closid = resctrl_arch_get_num_closid(r);
2320 	int cpu = cpumask_any(&d->hdr.cpu_mask);
2321 	int i;
2322 
2323 	d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
2324 				   GFP_KERNEL, cpu_to_node(cpu));
2325 	if (!d->mbps_val)
2326 		return -ENOMEM;
2327 
2328 	for (i = 0; i < num_closid; i++)
2329 		d->mbps_val[i] = MBA_MAX_MBPS;
2330 
2331 	return 0;
2332 }
2333 
mba_sc_domain_destroy(struct rdt_resource * r,struct rdt_ctrl_domain * d)2334 static void mba_sc_domain_destroy(struct rdt_resource *r,
2335 				  struct rdt_ctrl_domain *d)
2336 {
2337 	kfree(d->mbps_val);
2338 	d->mbps_val = NULL;
2339 }
2340 
2341 /*
2342  * MBA software controller is supported only if
2343  * MBM is supported and MBA is in linear scale,
2344  * and the MBM monitor scope is the same as MBA
2345  * control scope.
2346  */
supports_mba_mbps(void)2347 static bool supports_mba_mbps(void)
2348 {
2349 	struct rdt_resource *rmbm = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
2350 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
2351 
2352 	return (is_mbm_enabled() &&
2353 		r->alloc_capable && is_mba_linear() &&
2354 		r->ctrl_scope == rmbm->mon_scope);
2355 }
2356 
2357 /*
2358  * Enable or disable the MBA software controller
2359  * which helps user specify bandwidth in MBps.
2360  */
set_mba_sc(bool mba_sc)2361 static int set_mba_sc(bool mba_sc)
2362 {
2363 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
2364 	u32 num_closid = resctrl_arch_get_num_closid(r);
2365 	struct rdt_ctrl_domain *d;
2366 	unsigned long fflags;
2367 	int i;
2368 
2369 	if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
2370 		return -EINVAL;
2371 
2372 	r->membw.mba_sc = mba_sc;
2373 
2374 	rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
2375 
2376 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2377 		for (i = 0; i < num_closid; i++)
2378 			d->mbps_val[i] = MBA_MAX_MBPS;
2379 	}
2380 
2381 	fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
2382 	resctrl_file_fflags_init("mba_MBps_event", fflags);
2383 
2384 	return 0;
2385 }
2386 
cdp_enable(int level)2387 static int cdp_enable(int level)
2388 {
2389 	struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
2390 	int ret;
2391 
2392 	if (!r_l->alloc_capable)
2393 		return -EINVAL;
2394 
2395 	ret = set_cache_qos_cfg(level, true);
2396 	if (!ret)
2397 		rdt_resources_all[level].cdp_enabled = true;
2398 
2399 	return ret;
2400 }
2401 
cdp_disable(int level)2402 static void cdp_disable(int level)
2403 {
2404 	struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
2405 
2406 	if (r_hw->cdp_enabled) {
2407 		set_cache_qos_cfg(level, false);
2408 		r_hw->cdp_enabled = false;
2409 	}
2410 }
2411 
resctrl_arch_set_cdp_enabled(enum resctrl_res_level l,bool enable)2412 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
2413 {
2414 	struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
2415 
2416 	if (!hw_res->r_resctrl.cdp_capable)
2417 		return -EINVAL;
2418 
2419 	if (enable)
2420 		return cdp_enable(l);
2421 
2422 	cdp_disable(l);
2423 
2424 	return 0;
2425 }
2426 
2427 /*
2428  * We don't allow rdtgroup directories to be created anywhere
2429  * except the root directory. Thus when looking for the rdtgroup
2430  * structure for a kernfs node we are either looking at a directory,
2431  * in which case the rdtgroup structure is pointed at by the "priv"
2432  * field, otherwise we have a file, and need only look to the parent
2433  * to find the rdtgroup.
2434  */
kernfs_to_rdtgroup(struct kernfs_node * kn)2435 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
2436 {
2437 	if (kernfs_type(kn) == KERNFS_DIR) {
2438 		/*
2439 		 * All the resource directories use "kn->priv"
2440 		 * to point to the "struct rdtgroup" for the
2441 		 * resource. "info" and its subdirectories don't
2442 		 * have rdtgroup structures, so return NULL here.
2443 		 */
2444 		if (kn == kn_info || kn->parent == kn_info)
2445 			return NULL;
2446 		else
2447 			return kn->priv;
2448 	} else {
2449 		return kn->parent->priv;
2450 	}
2451 }
2452 
rdtgroup_kn_get(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2453 static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2454 {
2455 	atomic_inc(&rdtgrp->waitcount);
2456 	kernfs_break_active_protection(kn);
2457 }
2458 
rdtgroup_kn_put(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2459 static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2460 {
2461 	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
2462 	    (rdtgrp->flags & RDT_DELETED)) {
2463 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2464 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2465 			rdtgroup_pseudo_lock_remove(rdtgrp);
2466 		kernfs_unbreak_active_protection(kn);
2467 		rdtgroup_remove(rdtgrp);
2468 	} else {
2469 		kernfs_unbreak_active_protection(kn);
2470 	}
2471 }
2472 
rdtgroup_kn_lock_live(struct kernfs_node * kn)2473 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
2474 {
2475 	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2476 
2477 	if (!rdtgrp)
2478 		return NULL;
2479 
2480 	rdtgroup_kn_get(rdtgrp, kn);
2481 
2482 	cpus_read_lock();
2483 	mutex_lock(&rdtgroup_mutex);
2484 
2485 	/* Was this group deleted while we waited? */
2486 	if (rdtgrp->flags & RDT_DELETED)
2487 		return NULL;
2488 
2489 	return rdtgrp;
2490 }
2491 
rdtgroup_kn_unlock(struct kernfs_node * kn)2492 void rdtgroup_kn_unlock(struct kernfs_node *kn)
2493 {
2494 	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2495 
2496 	if (!rdtgrp)
2497 		return;
2498 
2499 	mutex_unlock(&rdtgroup_mutex);
2500 	cpus_read_unlock();
2501 
2502 	rdtgroup_kn_put(rdtgrp, kn);
2503 }
2504 
2505 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2506 			     struct rdtgroup *prgrp,
2507 			     struct kernfs_node **mon_data_kn);
2508 
rdt_disable_ctx(void)2509 static void rdt_disable_ctx(void)
2510 {
2511 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2512 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2513 	set_mba_sc(false);
2514 
2515 	resctrl_debug = false;
2516 }
2517 
rdt_enable_ctx(struct rdt_fs_context * ctx)2518 static int rdt_enable_ctx(struct rdt_fs_context *ctx)
2519 {
2520 	int ret = 0;
2521 
2522 	if (ctx->enable_cdpl2) {
2523 		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
2524 		if (ret)
2525 			goto out_done;
2526 	}
2527 
2528 	if (ctx->enable_cdpl3) {
2529 		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
2530 		if (ret)
2531 			goto out_cdpl2;
2532 	}
2533 
2534 	if (ctx->enable_mba_mbps) {
2535 		ret = set_mba_sc(true);
2536 		if (ret)
2537 			goto out_cdpl3;
2538 	}
2539 
2540 	if (ctx->enable_debug)
2541 		resctrl_debug = true;
2542 
2543 	return 0;
2544 
2545 out_cdpl3:
2546 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2547 out_cdpl2:
2548 	resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2549 out_done:
2550 	return ret;
2551 }
2552 
schemata_list_add(struct rdt_resource * r,enum resctrl_conf_type type)2553 static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
2554 {
2555 	struct resctrl_schema *s;
2556 	const char *suffix = "";
2557 	int ret, cl;
2558 
2559 	s = kzalloc(sizeof(*s), GFP_KERNEL);
2560 	if (!s)
2561 		return -ENOMEM;
2562 
2563 	s->res = r;
2564 	s->num_closid = resctrl_arch_get_num_closid(r);
2565 	if (resctrl_arch_get_cdp_enabled(r->rid))
2566 		s->num_closid /= 2;
2567 
2568 	s->conf_type = type;
2569 	switch (type) {
2570 	case CDP_CODE:
2571 		suffix = "CODE";
2572 		break;
2573 	case CDP_DATA:
2574 		suffix = "DATA";
2575 		break;
2576 	case CDP_NONE:
2577 		suffix = "";
2578 		break;
2579 	}
2580 
2581 	ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
2582 	if (ret >= sizeof(s->name)) {
2583 		kfree(s);
2584 		return -EINVAL;
2585 	}
2586 
2587 	cl = strlen(s->name);
2588 
2589 	/*
2590 	 * If CDP is supported by this resource, but not enabled,
2591 	 * include the suffix. This ensures the tabular format of the
2592 	 * schemata file does not change between mounts of the filesystem.
2593 	 */
2594 	if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
2595 		cl += 4;
2596 
2597 	if (cl > max_name_width)
2598 		max_name_width = cl;
2599 
2600 	INIT_LIST_HEAD(&s->list);
2601 	list_add(&s->list, &resctrl_schema_all);
2602 
2603 	return 0;
2604 }
2605 
schemata_list_create(void)2606 static int schemata_list_create(void)
2607 {
2608 	struct rdt_resource *r;
2609 	int ret = 0;
2610 
2611 	for_each_alloc_capable_rdt_resource(r) {
2612 		if (resctrl_arch_get_cdp_enabled(r->rid)) {
2613 			ret = schemata_list_add(r, CDP_CODE);
2614 			if (ret)
2615 				break;
2616 
2617 			ret = schemata_list_add(r, CDP_DATA);
2618 		} else {
2619 			ret = schemata_list_add(r, CDP_NONE);
2620 		}
2621 
2622 		if (ret)
2623 			break;
2624 	}
2625 
2626 	return ret;
2627 }
2628 
schemata_list_destroy(void)2629 static void schemata_list_destroy(void)
2630 {
2631 	struct resctrl_schema *s, *tmp;
2632 
2633 	list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
2634 		list_del(&s->list);
2635 		kfree(s);
2636 	}
2637 }
2638 
rdt_get_tree(struct fs_context * fc)2639 static int rdt_get_tree(struct fs_context *fc)
2640 {
2641 	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2642 	unsigned long flags = RFTYPE_CTRL_BASE;
2643 	struct rdt_mon_domain *dom;
2644 	struct rdt_resource *r;
2645 	int ret;
2646 
2647 	cpus_read_lock();
2648 	mutex_lock(&rdtgroup_mutex);
2649 	/*
2650 	 * resctrl file system can only be mounted once.
2651 	 */
2652 	if (resctrl_mounted) {
2653 		ret = -EBUSY;
2654 		goto out;
2655 	}
2656 
2657 	ret = rdtgroup_setup_root(ctx);
2658 	if (ret)
2659 		goto out;
2660 
2661 	ret = rdt_enable_ctx(ctx);
2662 	if (ret)
2663 		goto out_root;
2664 
2665 	ret = schemata_list_create();
2666 	if (ret) {
2667 		schemata_list_destroy();
2668 		goto out_ctx;
2669 	}
2670 
2671 	closid_init();
2672 
2673 	if (resctrl_arch_mon_capable())
2674 		flags |= RFTYPE_MON;
2675 
2676 	ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
2677 	if (ret)
2678 		goto out_schemata_free;
2679 
2680 	kernfs_activate(rdtgroup_default.kn);
2681 
2682 	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
2683 	if (ret < 0)
2684 		goto out_schemata_free;
2685 
2686 	if (resctrl_arch_mon_capable()) {
2687 		ret = mongroup_create_dir(rdtgroup_default.kn,
2688 					  &rdtgroup_default, "mon_groups",
2689 					  &kn_mongrp);
2690 		if (ret < 0)
2691 			goto out_info;
2692 
2693 		ret = mkdir_mondata_all(rdtgroup_default.kn,
2694 					&rdtgroup_default, &kn_mondata);
2695 		if (ret < 0)
2696 			goto out_mongrp;
2697 		rdtgroup_default.mon.mon_data_kn = kn_mondata;
2698 	}
2699 
2700 	ret = rdt_pseudo_lock_init();
2701 	if (ret)
2702 		goto out_mondata;
2703 
2704 	ret = kernfs_get_tree(fc);
2705 	if (ret < 0)
2706 		goto out_psl;
2707 
2708 	if (resctrl_arch_alloc_capable())
2709 		resctrl_arch_enable_alloc();
2710 	if (resctrl_arch_mon_capable())
2711 		resctrl_arch_enable_mon();
2712 
2713 	if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
2714 		resctrl_mounted = true;
2715 
2716 	if (is_mbm_enabled()) {
2717 		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
2718 		list_for_each_entry(dom, &r->mon_domains, hdr.list)
2719 			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
2720 						   RESCTRL_PICK_ANY_CPU);
2721 	}
2722 
2723 	goto out;
2724 
2725 out_psl:
2726 	rdt_pseudo_lock_release();
2727 out_mondata:
2728 	if (resctrl_arch_mon_capable())
2729 		kernfs_remove(kn_mondata);
2730 out_mongrp:
2731 	if (resctrl_arch_mon_capable())
2732 		kernfs_remove(kn_mongrp);
2733 out_info:
2734 	kernfs_remove(kn_info);
2735 out_schemata_free:
2736 	schemata_list_destroy();
2737 out_ctx:
2738 	rdt_disable_ctx();
2739 out_root:
2740 	rdtgroup_destroy_root();
2741 out:
2742 	rdt_last_cmd_clear();
2743 	mutex_unlock(&rdtgroup_mutex);
2744 	cpus_read_unlock();
2745 	return ret;
2746 }
2747 
2748 enum rdt_param {
2749 	Opt_cdp,
2750 	Opt_cdpl2,
2751 	Opt_mba_mbps,
2752 	Opt_debug,
2753 	nr__rdt_params
2754 };
2755 
2756 static const struct fs_parameter_spec rdt_fs_parameters[] = {
2757 	fsparam_flag("cdp",		Opt_cdp),
2758 	fsparam_flag("cdpl2",		Opt_cdpl2),
2759 	fsparam_flag("mba_MBps",	Opt_mba_mbps),
2760 	fsparam_flag("debug",		Opt_debug),
2761 	{}
2762 };
2763 
rdt_parse_param(struct fs_context * fc,struct fs_parameter * param)2764 static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2765 {
2766 	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2767 	struct fs_parse_result result;
2768 	const char *msg;
2769 	int opt;
2770 
2771 	opt = fs_parse(fc, rdt_fs_parameters, param, &result);
2772 	if (opt < 0)
2773 		return opt;
2774 
2775 	switch (opt) {
2776 	case Opt_cdp:
2777 		ctx->enable_cdpl3 = true;
2778 		return 0;
2779 	case Opt_cdpl2:
2780 		ctx->enable_cdpl2 = true;
2781 		return 0;
2782 	case Opt_mba_mbps:
2783 		msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
2784 		if (!supports_mba_mbps())
2785 			return invalfc(fc, msg);
2786 		ctx->enable_mba_mbps = true;
2787 		return 0;
2788 	case Opt_debug:
2789 		ctx->enable_debug = true;
2790 		return 0;
2791 	}
2792 
2793 	return -EINVAL;
2794 }
2795 
rdt_fs_context_free(struct fs_context * fc)2796 static void rdt_fs_context_free(struct fs_context *fc)
2797 {
2798 	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2799 
2800 	kernfs_free_fs_context(fc);
2801 	kfree(ctx);
2802 }
2803 
2804 static const struct fs_context_operations rdt_fs_context_ops = {
2805 	.free		= rdt_fs_context_free,
2806 	.parse_param	= rdt_parse_param,
2807 	.get_tree	= rdt_get_tree,
2808 };
2809 
rdt_init_fs_context(struct fs_context * fc)2810 static int rdt_init_fs_context(struct fs_context *fc)
2811 {
2812 	struct rdt_fs_context *ctx;
2813 
2814 	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2815 	if (!ctx)
2816 		return -ENOMEM;
2817 
2818 	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2819 	fc->fs_private = &ctx->kfc;
2820 	fc->ops = &rdt_fs_context_ops;
2821 	put_user_ns(fc->user_ns);
2822 	fc->user_ns = get_user_ns(&init_user_ns);
2823 	fc->global = true;
2824 	return 0;
2825 }
2826 
reset_all_ctrls(struct rdt_resource * r)2827 static int reset_all_ctrls(struct rdt_resource *r)
2828 {
2829 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2830 	struct rdt_hw_ctrl_domain *hw_dom;
2831 	struct msr_param msr_param;
2832 	struct rdt_ctrl_domain *d;
2833 	int i;
2834 
2835 	/* Walking r->domains, ensure it can't race with cpuhp */
2836 	lockdep_assert_cpus_held();
2837 
2838 	msr_param.res = r;
2839 	msr_param.low = 0;
2840 	msr_param.high = hw_res->num_closid;
2841 
2842 	/*
2843 	 * Disable resource control for this resource by setting all
2844 	 * CBMs in all ctrl_domains to the maximum mask value. Pick one CPU
2845 	 * from each domain to update the MSRs below.
2846 	 */
2847 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2848 		hw_dom = resctrl_to_arch_ctrl_dom(d);
2849 
2850 		for (i = 0; i < hw_res->num_closid; i++)
2851 			hw_dom->ctrl_val[i] = r->default_ctrl;
2852 		msr_param.dom = d;
2853 		smp_call_function_any(&d->hdr.cpu_mask, rdt_ctrl_update, &msr_param, 1);
2854 	}
2855 
2856 	return 0;
2857 }
2858 
2859 /*
2860  * Move tasks from one to the other group. If @from is NULL, then all tasks
2861  * in the systems are moved unconditionally (used for teardown).
2862  *
2863  * If @mask is not NULL the cpus on which moved tasks are running are set
2864  * in that mask so the update smp function call is restricted to affected
2865  * cpus.
2866  */
rdt_move_group_tasks(struct rdtgroup * from,struct rdtgroup * to,struct cpumask * mask)2867 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2868 				 struct cpumask *mask)
2869 {
2870 	struct task_struct *p, *t;
2871 
2872 	read_lock(&tasklist_lock);
2873 	for_each_process_thread(p, t) {
2874 		if (!from || is_closid_match(t, from) ||
2875 		    is_rmid_match(t, from)) {
2876 			resctrl_arch_set_closid_rmid(t, to->closid,
2877 						     to->mon.rmid);
2878 
2879 			/*
2880 			 * Order the closid/rmid stores above before the loads
2881 			 * in task_curr(). This pairs with the full barrier
2882 			 * between the rq->curr update and resctrl_sched_in()
2883 			 * during context switch.
2884 			 */
2885 			smp_mb();
2886 
2887 			/*
2888 			 * If the task is on a CPU, set the CPU in the mask.
2889 			 * The detection is inaccurate as tasks might move or
2890 			 * schedule before the smp function call takes place.
2891 			 * In such a case the function call is pointless, but
2892 			 * there is no other side effect.
2893 			 */
2894 			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
2895 				cpumask_set_cpu(task_cpu(t), mask);
2896 		}
2897 	}
2898 	read_unlock(&tasklist_lock);
2899 }
2900 
free_all_child_rdtgrp(struct rdtgroup * rdtgrp)2901 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2902 {
2903 	struct rdtgroup *sentry, *stmp;
2904 	struct list_head *head;
2905 
2906 	head = &rdtgrp->mon.crdtgrp_list;
2907 	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2908 		free_rmid(sentry->closid, sentry->mon.rmid);
2909 		list_del(&sentry->mon.crdtgrp_list);
2910 
2911 		if (atomic_read(&sentry->waitcount) != 0)
2912 			sentry->flags = RDT_DELETED;
2913 		else
2914 			rdtgroup_remove(sentry);
2915 	}
2916 }
2917 
2918 /*
2919  * Forcibly remove all of subdirectories under root.
2920  */
rmdir_all_sub(void)2921 static void rmdir_all_sub(void)
2922 {
2923 	struct rdtgroup *rdtgrp, *tmp;
2924 
2925 	/* Move all tasks to the default resource group */
2926 	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2927 
2928 	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2929 		/* Free any child rmids */
2930 		free_all_child_rdtgrp(rdtgrp);
2931 
2932 		/* Remove each rdtgroup other than root */
2933 		if (rdtgrp == &rdtgroup_default)
2934 			continue;
2935 
2936 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2937 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2938 			rdtgroup_pseudo_lock_remove(rdtgrp);
2939 
2940 		/*
2941 		 * Give any CPUs back to the default group. We cannot copy
2942 		 * cpu_online_mask because a CPU might have executed the
2943 		 * offline callback already, but is still marked online.
2944 		 */
2945 		cpumask_or(&rdtgroup_default.cpu_mask,
2946 			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2947 
2948 		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
2949 
2950 		kernfs_remove(rdtgrp->kn);
2951 		list_del(&rdtgrp->rdtgroup_list);
2952 
2953 		if (atomic_read(&rdtgrp->waitcount) != 0)
2954 			rdtgrp->flags = RDT_DELETED;
2955 		else
2956 			rdtgroup_remove(rdtgrp);
2957 	}
2958 	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2959 	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2960 
2961 	kernfs_remove(kn_info);
2962 	kernfs_remove(kn_mongrp);
2963 	kernfs_remove(kn_mondata);
2964 }
2965 
rdt_kill_sb(struct super_block * sb)2966 static void rdt_kill_sb(struct super_block *sb)
2967 {
2968 	struct rdt_resource *r;
2969 
2970 	cpus_read_lock();
2971 	mutex_lock(&rdtgroup_mutex);
2972 
2973 	rdt_disable_ctx();
2974 
2975 	/*Put everything back to default values. */
2976 	for_each_alloc_capable_rdt_resource(r)
2977 		reset_all_ctrls(r);
2978 	rmdir_all_sub();
2979 	rdt_pseudo_lock_release();
2980 	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
2981 	schemata_list_destroy();
2982 	rdtgroup_destroy_root();
2983 	if (resctrl_arch_alloc_capable())
2984 		resctrl_arch_disable_alloc();
2985 	if (resctrl_arch_mon_capable())
2986 		resctrl_arch_disable_mon();
2987 	resctrl_mounted = false;
2988 	kernfs_kill_sb(sb);
2989 	mutex_unlock(&rdtgroup_mutex);
2990 	cpus_read_unlock();
2991 }
2992 
2993 static struct file_system_type rdt_fs_type = {
2994 	.name			= "resctrl",
2995 	.init_fs_context	= rdt_init_fs_context,
2996 	.parameters		= rdt_fs_parameters,
2997 	.kill_sb		= rdt_kill_sb,
2998 };
2999 
mon_addfile(struct kernfs_node * parent_kn,const char * name,void * priv)3000 static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
3001 		       void *priv)
3002 {
3003 	struct kernfs_node *kn;
3004 	int ret = 0;
3005 
3006 	kn = __kernfs_create_file(parent_kn, name, 0444,
3007 				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
3008 				  &kf_mondata_ops, priv, NULL, NULL);
3009 	if (IS_ERR(kn))
3010 		return PTR_ERR(kn);
3011 
3012 	ret = rdtgroup_kn_set_ugid(kn);
3013 	if (ret) {
3014 		kernfs_remove(kn);
3015 		return ret;
3016 	}
3017 
3018 	return ret;
3019 }
3020 
mon_rmdir_one_subdir(struct kernfs_node * pkn,char * name,char * subname)3021 static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
3022 {
3023 	struct kernfs_node *kn;
3024 
3025 	kn = kernfs_find_and_get(pkn, name);
3026 	if (!kn)
3027 		return;
3028 	kernfs_put(kn);
3029 
3030 	if (kn->dir.subdirs <= 1)
3031 		kernfs_remove(kn);
3032 	else
3033 		kernfs_remove_by_name(kn, subname);
3034 }
3035 
3036 /*
3037  * Remove all subdirectories of mon_data of ctrl_mon groups
3038  * and monitor groups for the given domain.
3039  * Remove files and directories containing "sum" of domain data
3040  * when last domain being summed is removed.
3041  */
rmdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_mon_domain * d)3042 static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3043 					   struct rdt_mon_domain *d)
3044 {
3045 	struct rdtgroup *prgrp, *crgrp;
3046 	char subname[32];
3047 	bool snc_mode;
3048 	char name[32];
3049 
3050 	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3051 	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3052 	if (snc_mode)
3053 		sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
3054 
3055 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3056 		mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
3057 
3058 		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
3059 			mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
3060 	}
3061 }
3062 
mon_add_all_files(struct kernfs_node * kn,struct rdt_mon_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp,bool do_sum)3063 static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
3064 			     struct rdt_resource *r, struct rdtgroup *prgrp,
3065 			     bool do_sum)
3066 {
3067 	struct rmid_read rr = {0};
3068 	union mon_data_bits priv;
3069 	struct mon_evt *mevt;
3070 	int ret;
3071 
3072 	if (WARN_ON(list_empty(&r->evt_list)))
3073 		return -EPERM;
3074 
3075 	priv.u.rid = r->rid;
3076 	priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
3077 	priv.u.sum = do_sum;
3078 	list_for_each_entry(mevt, &r->evt_list, list) {
3079 		priv.u.evtid = mevt->evtid;
3080 		ret = mon_addfile(kn, mevt->name, priv.priv);
3081 		if (ret)
3082 			return ret;
3083 
3084 		if (!do_sum && is_mbm_event(mevt->evtid))
3085 			mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
3086 	}
3087 
3088 	return 0;
3089 }
3090 
mkdir_mondata_subdir(struct kernfs_node * parent_kn,struct rdt_mon_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp)3091 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
3092 				struct rdt_mon_domain *d,
3093 				struct rdt_resource *r, struct rdtgroup *prgrp)
3094 {
3095 	struct kernfs_node *kn, *ckn;
3096 	char name[32];
3097 	bool snc_mode;
3098 	int ret = 0;
3099 
3100 	lockdep_assert_held(&rdtgroup_mutex);
3101 
3102 	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3103 	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3104 	kn = kernfs_find_and_get(parent_kn, name);
3105 	if (kn) {
3106 		/*
3107 		 * rdtgroup_mutex will prevent this directory from being
3108 		 * removed. No need to keep this hold.
3109 		 */
3110 		kernfs_put(kn);
3111 	} else {
3112 		kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
3113 		if (IS_ERR(kn))
3114 			return PTR_ERR(kn);
3115 
3116 		ret = rdtgroup_kn_set_ugid(kn);
3117 		if (ret)
3118 			goto out_destroy;
3119 		ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
3120 		if (ret)
3121 			goto out_destroy;
3122 	}
3123 
3124 	if (snc_mode) {
3125 		sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
3126 		ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
3127 		if (IS_ERR(ckn)) {
3128 			ret = -EINVAL;
3129 			goto out_destroy;
3130 		}
3131 
3132 		ret = rdtgroup_kn_set_ugid(ckn);
3133 		if (ret)
3134 			goto out_destroy;
3135 
3136 		ret = mon_add_all_files(ckn, d, r, prgrp, false);
3137 		if (ret)
3138 			goto out_destroy;
3139 	}
3140 
3141 	kernfs_activate(kn);
3142 	return 0;
3143 
3144 out_destroy:
3145 	kernfs_remove(kn);
3146 	return ret;
3147 }
3148 
3149 /*
3150  * Add all subdirectories of mon_data for "ctrl_mon" groups
3151  * and "monitor" groups with given domain id.
3152  */
mkdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_mon_domain * d)3153 static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3154 					   struct rdt_mon_domain *d)
3155 {
3156 	struct kernfs_node *parent_kn;
3157 	struct rdtgroup *prgrp, *crgrp;
3158 	struct list_head *head;
3159 
3160 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3161 		parent_kn = prgrp->mon.mon_data_kn;
3162 		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
3163 
3164 		head = &prgrp->mon.crdtgrp_list;
3165 		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
3166 			parent_kn = crgrp->mon.mon_data_kn;
3167 			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
3168 		}
3169 	}
3170 }
3171 
mkdir_mondata_subdir_alldom(struct kernfs_node * parent_kn,struct rdt_resource * r,struct rdtgroup * prgrp)3172 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
3173 				       struct rdt_resource *r,
3174 				       struct rdtgroup *prgrp)
3175 {
3176 	struct rdt_mon_domain *dom;
3177 	int ret;
3178 
3179 	/* Walking r->domains, ensure it can't race with cpuhp */
3180 	lockdep_assert_cpus_held();
3181 
3182 	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
3183 		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
3184 		if (ret)
3185 			return ret;
3186 	}
3187 
3188 	return 0;
3189 }
3190 
3191 /*
3192  * This creates a directory mon_data which contains the monitored data.
3193  *
3194  * mon_data has one directory for each domain which are named
3195  * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
3196  * with L3 domain looks as below:
3197  * ./mon_data:
3198  * mon_L3_00
3199  * mon_L3_01
3200  * mon_L3_02
3201  * ...
3202  *
3203  * Each domain directory has one file per event:
3204  * ./mon_L3_00/:
3205  * llc_occupancy
3206  *
3207  */
mkdir_mondata_all(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,struct kernfs_node ** dest_kn)3208 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
3209 			     struct rdtgroup *prgrp,
3210 			     struct kernfs_node **dest_kn)
3211 {
3212 	struct rdt_resource *r;
3213 	struct kernfs_node *kn;
3214 	int ret;
3215 
3216 	/*
3217 	 * Create the mon_data directory first.
3218 	 */
3219 	ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
3220 	if (ret)
3221 		return ret;
3222 
3223 	if (dest_kn)
3224 		*dest_kn = kn;
3225 
3226 	/*
3227 	 * Create the subdirectories for each domain. Note that all events
3228 	 * in a domain like L3 are grouped into a resource whose domain is L3
3229 	 */
3230 	for_each_mon_capable_rdt_resource(r) {
3231 		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
3232 		if (ret)
3233 			goto out_destroy;
3234 	}
3235 
3236 	return 0;
3237 
3238 out_destroy:
3239 	kernfs_remove(kn);
3240 	return ret;
3241 }
3242 
3243 /**
3244  * cbm_ensure_valid - Enforce validity on provided CBM
3245  * @_val:	Candidate CBM
3246  * @r:		RDT resource to which the CBM belongs
3247  *
3248  * The provided CBM represents all cache portions available for use. This
3249  * may be represented by a bitmap that does not consist of contiguous ones
3250  * and thus be an invalid CBM.
3251  * Here the provided CBM is forced to be a valid CBM by only considering
3252  * the first set of contiguous bits as valid and clearing all bits.
3253  * The intention here is to provide a valid default CBM with which a new
3254  * resource group is initialized. The user can follow this with a
3255  * modification to the CBM if the default does not satisfy the
3256  * requirements.
3257  */
cbm_ensure_valid(u32 _val,struct rdt_resource * r)3258 static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
3259 {
3260 	unsigned int cbm_len = r->cache.cbm_len;
3261 	unsigned long first_bit, zero_bit;
3262 	unsigned long val = _val;
3263 
3264 	if (!val)
3265 		return 0;
3266 
3267 	first_bit = find_first_bit(&val, cbm_len);
3268 	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
3269 
3270 	/* Clear any remaining bits to ensure contiguous region */
3271 	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
3272 	return (u32)val;
3273 }
3274 
3275 /*
3276  * Initialize cache resources per RDT domain
3277  *
3278  * Set the RDT domain up to start off with all usable allocations. That is,
3279  * all shareable and unused bits. All-zero CBM is invalid.
3280  */
__init_one_rdt_domain(struct rdt_ctrl_domain * d,struct resctrl_schema * s,u32 closid)3281 static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
3282 				 u32 closid)
3283 {
3284 	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
3285 	enum resctrl_conf_type t = s->conf_type;
3286 	struct resctrl_staged_config *cfg;
3287 	struct rdt_resource *r = s->res;
3288 	u32 used_b = 0, unused_b = 0;
3289 	unsigned long tmp_cbm;
3290 	enum rdtgrp_mode mode;
3291 	u32 peer_ctl, ctrl_val;
3292 	int i;
3293 
3294 	cfg = &d->staged_config[t];
3295 	cfg->have_new_ctrl = false;
3296 	cfg->new_ctrl = r->cache.shareable_bits;
3297 	used_b = r->cache.shareable_bits;
3298 	for (i = 0; i < closids_supported(); i++) {
3299 		if (closid_allocated(i) && i != closid) {
3300 			mode = rdtgroup_mode_by_closid(i);
3301 			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
3302 				/*
3303 				 * ctrl values for locksetup aren't relevant
3304 				 * until the schemata is written, and the mode
3305 				 * becomes RDT_MODE_PSEUDO_LOCKED.
3306 				 */
3307 				continue;
3308 			/*
3309 			 * If CDP is active include peer domain's
3310 			 * usage to ensure there is no overlap
3311 			 * with an exclusive group.
3312 			 */
3313 			if (resctrl_arch_get_cdp_enabled(r->rid))
3314 				peer_ctl = resctrl_arch_get_config(r, d, i,
3315 								   peer_type);
3316 			else
3317 				peer_ctl = 0;
3318 			ctrl_val = resctrl_arch_get_config(r, d, i,
3319 							   s->conf_type);
3320 			used_b |= ctrl_val | peer_ctl;
3321 			if (mode == RDT_MODE_SHAREABLE)
3322 				cfg->new_ctrl |= ctrl_val | peer_ctl;
3323 		}
3324 	}
3325 	if (d->plr && d->plr->cbm > 0)
3326 		used_b |= d->plr->cbm;
3327 	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
3328 	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
3329 	cfg->new_ctrl |= unused_b;
3330 	/*
3331 	 * Force the initial CBM to be valid, user can
3332 	 * modify the CBM based on system availability.
3333 	 */
3334 	cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
3335 	/*
3336 	 * Assign the u32 CBM to an unsigned long to ensure that
3337 	 * bitmap_weight() does not access out-of-bound memory.
3338 	 */
3339 	tmp_cbm = cfg->new_ctrl;
3340 	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
3341 		rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
3342 		return -ENOSPC;
3343 	}
3344 	cfg->have_new_ctrl = true;
3345 
3346 	return 0;
3347 }
3348 
3349 /*
3350  * Initialize cache resources with default values.
3351  *
3352  * A new RDT group is being created on an allocation capable (CAT)
3353  * supporting system. Set this group up to start off with all usable
3354  * allocations.
3355  *
3356  * If there are no more shareable bits available on any domain then
3357  * the entire allocation will fail.
3358  */
rdtgroup_init_cat(struct resctrl_schema * s,u32 closid)3359 static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
3360 {
3361 	struct rdt_ctrl_domain *d;
3362 	int ret;
3363 
3364 	list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
3365 		ret = __init_one_rdt_domain(d, s, closid);
3366 		if (ret < 0)
3367 			return ret;
3368 	}
3369 
3370 	return 0;
3371 }
3372 
3373 /* Initialize MBA resource with default values. */
rdtgroup_init_mba(struct rdt_resource * r,u32 closid)3374 static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
3375 {
3376 	struct resctrl_staged_config *cfg;
3377 	struct rdt_ctrl_domain *d;
3378 
3379 	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
3380 		if (is_mba_sc(r)) {
3381 			d->mbps_val[closid] = MBA_MAX_MBPS;
3382 			continue;
3383 		}
3384 
3385 		cfg = &d->staged_config[CDP_NONE];
3386 		cfg->new_ctrl = r->default_ctrl;
3387 		cfg->have_new_ctrl = true;
3388 	}
3389 }
3390 
3391 /* Initialize the RDT group's allocations. */
rdtgroup_init_alloc(struct rdtgroup * rdtgrp)3392 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
3393 {
3394 	struct resctrl_schema *s;
3395 	struct rdt_resource *r;
3396 	int ret = 0;
3397 
3398 	rdt_staged_configs_clear();
3399 
3400 	list_for_each_entry(s, &resctrl_schema_all, list) {
3401 		r = s->res;
3402 		if (r->rid == RDT_RESOURCE_MBA ||
3403 		    r->rid == RDT_RESOURCE_SMBA) {
3404 			rdtgroup_init_mba(r, rdtgrp->closid);
3405 			if (is_mba_sc(r))
3406 				continue;
3407 		} else {
3408 			ret = rdtgroup_init_cat(s, rdtgrp->closid);
3409 			if (ret < 0)
3410 				goto out;
3411 		}
3412 
3413 		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
3414 		if (ret < 0) {
3415 			rdt_last_cmd_puts("Failed to initialize allocations\n");
3416 			goto out;
3417 		}
3418 
3419 	}
3420 
3421 	rdtgrp->mode = RDT_MODE_SHAREABLE;
3422 
3423 out:
3424 	rdt_staged_configs_clear();
3425 	return ret;
3426 }
3427 
mkdir_rdt_prepare_rmid_alloc(struct rdtgroup * rdtgrp)3428 static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
3429 {
3430 	int ret;
3431 
3432 	if (!resctrl_arch_mon_capable())
3433 		return 0;
3434 
3435 	ret = alloc_rmid(rdtgrp->closid);
3436 	if (ret < 0) {
3437 		rdt_last_cmd_puts("Out of RMIDs\n");
3438 		return ret;
3439 	}
3440 	rdtgrp->mon.rmid = ret;
3441 
3442 	ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
3443 	if (ret) {
3444 		rdt_last_cmd_puts("kernfs subdir error\n");
3445 		free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3446 		return ret;
3447 	}
3448 
3449 	return 0;
3450 }
3451 
mkdir_rdt_prepare_rmid_free(struct rdtgroup * rgrp)3452 static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
3453 {
3454 	if (resctrl_arch_mon_capable())
3455 		free_rmid(rgrp->closid, rgrp->mon.rmid);
3456 }
3457 
mkdir_rdt_prepare(struct kernfs_node * parent_kn,const char * name,umode_t mode,enum rdt_group_type rtype,struct rdtgroup ** r)3458 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
3459 			     const char *name, umode_t mode,
3460 			     enum rdt_group_type rtype, struct rdtgroup **r)
3461 {
3462 	struct rdtgroup *prdtgrp, *rdtgrp;
3463 	unsigned long files = 0;
3464 	struct kernfs_node *kn;
3465 	int ret;
3466 
3467 	prdtgrp = rdtgroup_kn_lock_live(parent_kn);
3468 	if (!prdtgrp) {
3469 		ret = -ENODEV;
3470 		goto out_unlock;
3471 	}
3472 
3473 	if (rtype == RDTMON_GROUP &&
3474 	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3475 	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
3476 		ret = -EINVAL;
3477 		rdt_last_cmd_puts("Pseudo-locking in progress\n");
3478 		goto out_unlock;
3479 	}
3480 
3481 	/* allocate the rdtgroup. */
3482 	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
3483 	if (!rdtgrp) {
3484 		ret = -ENOSPC;
3485 		rdt_last_cmd_puts("Kernel out of memory\n");
3486 		goto out_unlock;
3487 	}
3488 	*r = rdtgrp;
3489 	rdtgrp->mon.parent = prdtgrp;
3490 	rdtgrp->type = rtype;
3491 	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
3492 
3493 	/* kernfs creates the directory for rdtgrp */
3494 	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
3495 	if (IS_ERR(kn)) {
3496 		ret = PTR_ERR(kn);
3497 		rdt_last_cmd_puts("kernfs create error\n");
3498 		goto out_free_rgrp;
3499 	}
3500 	rdtgrp->kn = kn;
3501 
3502 	/*
3503 	 * kernfs_remove() will drop the reference count on "kn" which
3504 	 * will free it. But we still need it to stick around for the
3505 	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
3506 	 * which will be dropped by kernfs_put() in rdtgroup_remove().
3507 	 */
3508 	kernfs_get(kn);
3509 
3510 	ret = rdtgroup_kn_set_ugid(kn);
3511 	if (ret) {
3512 		rdt_last_cmd_puts("kernfs perm error\n");
3513 		goto out_destroy;
3514 	}
3515 
3516 	if (rtype == RDTCTRL_GROUP) {
3517 		files = RFTYPE_BASE | RFTYPE_CTRL;
3518 		if (resctrl_arch_mon_capable())
3519 			files |= RFTYPE_MON;
3520 	} else {
3521 		files = RFTYPE_BASE | RFTYPE_MON;
3522 	}
3523 
3524 	ret = rdtgroup_add_files(kn, files);
3525 	if (ret) {
3526 		rdt_last_cmd_puts("kernfs fill error\n");
3527 		goto out_destroy;
3528 	}
3529 
3530 	/*
3531 	 * The caller unlocks the parent_kn upon success.
3532 	 */
3533 	return 0;
3534 
3535 out_destroy:
3536 	kernfs_put(rdtgrp->kn);
3537 	kernfs_remove(rdtgrp->kn);
3538 out_free_rgrp:
3539 	kfree(rdtgrp);
3540 out_unlock:
3541 	rdtgroup_kn_unlock(parent_kn);
3542 	return ret;
3543 }
3544 
mkdir_rdt_prepare_clean(struct rdtgroup * rgrp)3545 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
3546 {
3547 	kernfs_remove(rgrp->kn);
3548 	rdtgroup_remove(rgrp);
3549 }
3550 
3551 /*
3552  * Create a monitor group under "mon_groups" directory of a control
3553  * and monitor group(ctrl_mon). This is a resource group
3554  * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
3555  */
rdtgroup_mkdir_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3556 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
3557 			      const char *name, umode_t mode)
3558 {
3559 	struct rdtgroup *rdtgrp, *prgrp;
3560 	int ret;
3561 
3562 	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
3563 	if (ret)
3564 		return ret;
3565 
3566 	prgrp = rdtgrp->mon.parent;
3567 	rdtgrp->closid = prgrp->closid;
3568 
3569 	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3570 	if (ret) {
3571 		mkdir_rdt_prepare_clean(rdtgrp);
3572 		goto out_unlock;
3573 	}
3574 
3575 	kernfs_activate(rdtgrp->kn);
3576 
3577 	/*
3578 	 * Add the rdtgrp to the list of rdtgrps the parent
3579 	 * ctrl_mon group has to track.
3580 	 */
3581 	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
3582 
3583 out_unlock:
3584 	rdtgroup_kn_unlock(parent_kn);
3585 	return ret;
3586 }
3587 
3588 /*
3589  * These are rdtgroups created under the root directory. Can be used
3590  * to allocate and monitor resources.
3591  */
rdtgroup_mkdir_ctrl_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3592 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
3593 				   const char *name, umode_t mode)
3594 {
3595 	struct rdtgroup *rdtgrp;
3596 	struct kernfs_node *kn;
3597 	u32 closid;
3598 	int ret;
3599 
3600 	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
3601 	if (ret)
3602 		return ret;
3603 
3604 	kn = rdtgrp->kn;
3605 	ret = closid_alloc();
3606 	if (ret < 0) {
3607 		rdt_last_cmd_puts("Out of CLOSIDs\n");
3608 		goto out_common_fail;
3609 	}
3610 	closid = ret;
3611 	ret = 0;
3612 
3613 	rdtgrp->closid = closid;
3614 
3615 	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3616 	if (ret)
3617 		goto out_closid_free;
3618 
3619 	kernfs_activate(rdtgrp->kn);
3620 
3621 	ret = rdtgroup_init_alloc(rdtgrp);
3622 	if (ret < 0)
3623 		goto out_rmid_free;
3624 
3625 	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
3626 
3627 	if (resctrl_arch_mon_capable()) {
3628 		/*
3629 		 * Create an empty mon_groups directory to hold the subset
3630 		 * of tasks and cpus to monitor.
3631 		 */
3632 		ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
3633 		if (ret) {
3634 			rdt_last_cmd_puts("kernfs subdir error\n");
3635 			goto out_del_list;
3636 		}
3637 		if (is_mba_sc(NULL))
3638 			rdtgrp->mba_mbps_event = mba_mbps_default_event;
3639 	}
3640 
3641 	goto out_unlock;
3642 
3643 out_del_list:
3644 	list_del(&rdtgrp->rdtgroup_list);
3645 out_rmid_free:
3646 	mkdir_rdt_prepare_rmid_free(rdtgrp);
3647 out_closid_free:
3648 	closid_free(closid);
3649 out_common_fail:
3650 	mkdir_rdt_prepare_clean(rdtgrp);
3651 out_unlock:
3652 	rdtgroup_kn_unlock(parent_kn);
3653 	return ret;
3654 }
3655 
3656 /*
3657  * We allow creating mon groups only with in a directory called "mon_groups"
3658  * which is present in every ctrl_mon group. Check if this is a valid
3659  * "mon_groups" directory.
3660  *
3661  * 1. The directory should be named "mon_groups".
3662  * 2. The mon group itself should "not" be named "mon_groups".
3663  *   This makes sure "mon_groups" directory always has a ctrl_mon group
3664  *   as parent.
3665  */
is_mon_groups(struct kernfs_node * kn,const char * name)3666 static bool is_mon_groups(struct kernfs_node *kn, const char *name)
3667 {
3668 	return (!strcmp(kn->name, "mon_groups") &&
3669 		strcmp(name, "mon_groups"));
3670 }
3671 
rdtgroup_mkdir(struct kernfs_node * parent_kn,const char * name,umode_t mode)3672 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
3673 			  umode_t mode)
3674 {
3675 	/* Do not accept '\n' to avoid unparsable situation. */
3676 	if (strchr(name, '\n'))
3677 		return -EINVAL;
3678 
3679 	/*
3680 	 * If the parent directory is the root directory and RDT
3681 	 * allocation is supported, add a control and monitoring
3682 	 * subdirectory
3683 	 */
3684 	if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
3685 		return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
3686 
3687 	/*
3688 	 * If RDT monitoring is supported and the parent directory is a valid
3689 	 * "mon_groups" directory, add a monitoring subdirectory.
3690 	 */
3691 	if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name))
3692 		return rdtgroup_mkdir_mon(parent_kn, name, mode);
3693 
3694 	return -EPERM;
3695 }
3696 
rdtgroup_rmdir_mon(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3697 static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3698 {
3699 	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3700 	int cpu;
3701 
3702 	/* Give any tasks back to the parent group */
3703 	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
3704 
3705 	/* Update per cpu rmid of the moved CPUs first */
3706 	for_each_cpu(cpu, &rdtgrp->cpu_mask)
3707 		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
3708 	/*
3709 	 * Update the MSR on moved CPUs and CPUs which have moved
3710 	 * task running on them.
3711 	 */
3712 	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3713 	update_closid_rmid(tmpmask, NULL);
3714 
3715 	rdtgrp->flags = RDT_DELETED;
3716 	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3717 
3718 	/*
3719 	 * Remove the rdtgrp from the parent ctrl_mon group's list
3720 	 */
3721 	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3722 	list_del(&rdtgrp->mon.crdtgrp_list);
3723 
3724 	kernfs_remove(rdtgrp->kn);
3725 
3726 	return 0;
3727 }
3728 
rdtgroup_ctrl_remove(struct rdtgroup * rdtgrp)3729 static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
3730 {
3731 	rdtgrp->flags = RDT_DELETED;
3732 	list_del(&rdtgrp->rdtgroup_list);
3733 
3734 	kernfs_remove(rdtgrp->kn);
3735 	return 0;
3736 }
3737 
rdtgroup_rmdir_ctrl(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3738 static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3739 {
3740 	int cpu;
3741 
3742 	/* Give any tasks back to the default group */
3743 	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
3744 
3745 	/* Give any CPUs back to the default group */
3746 	cpumask_or(&rdtgroup_default.cpu_mask,
3747 		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3748 
3749 	/* Update per cpu closid and rmid of the moved CPUs first */
3750 	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
3751 		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
3752 		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
3753 	}
3754 
3755 	/*
3756 	 * Update the MSR on moved CPUs and CPUs which have moved
3757 	 * task running on them.
3758 	 */
3759 	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3760 	update_closid_rmid(tmpmask, NULL);
3761 
3762 	free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
3763 	closid_free(rdtgrp->closid);
3764 
3765 	rdtgroup_ctrl_remove(rdtgrp);
3766 
3767 	/*
3768 	 * Free all the child monitor group rmids.
3769 	 */
3770 	free_all_child_rdtgrp(rdtgrp);
3771 
3772 	return 0;
3773 }
3774 
rdtgroup_rmdir(struct kernfs_node * kn)3775 static int rdtgroup_rmdir(struct kernfs_node *kn)
3776 {
3777 	struct kernfs_node *parent_kn = kn->parent;
3778 	struct rdtgroup *rdtgrp;
3779 	cpumask_var_t tmpmask;
3780 	int ret = 0;
3781 
3782 	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
3783 		return -ENOMEM;
3784 
3785 	rdtgrp = rdtgroup_kn_lock_live(kn);
3786 	if (!rdtgrp) {
3787 		ret = -EPERM;
3788 		goto out;
3789 	}
3790 
3791 	/*
3792 	 * If the rdtgroup is a ctrl_mon group and parent directory
3793 	 * is the root directory, remove the ctrl_mon group.
3794 	 *
3795 	 * If the rdtgroup is a mon group and parent directory
3796 	 * is a valid "mon_groups" directory, remove the mon group.
3797 	 */
3798 	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
3799 	    rdtgrp != &rdtgroup_default) {
3800 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3801 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
3802 			ret = rdtgroup_ctrl_remove(rdtgrp);
3803 		} else {
3804 			ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
3805 		}
3806 	} else if (rdtgrp->type == RDTMON_GROUP &&
3807 		 is_mon_groups(parent_kn, kn->name)) {
3808 		ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
3809 	} else {
3810 		ret = -EPERM;
3811 	}
3812 
3813 out:
3814 	rdtgroup_kn_unlock(kn);
3815 	free_cpumask_var(tmpmask);
3816 	return ret;
3817 }
3818 
3819 /**
3820  * mongrp_reparent() - replace parent CTRL_MON group of a MON group
3821  * @rdtgrp:		the MON group whose parent should be replaced
3822  * @new_prdtgrp:	replacement parent CTRL_MON group for @rdtgrp
3823  * @cpus:		cpumask provided by the caller for use during this call
3824  *
3825  * Replaces the parent CTRL_MON group for a MON group, resulting in all member
3826  * tasks' CLOSID immediately changing to that of the new parent group.
3827  * Monitoring data for the group is unaffected by this operation.
3828  */
mongrp_reparent(struct rdtgroup * rdtgrp,struct rdtgroup * new_prdtgrp,cpumask_var_t cpus)3829 static void mongrp_reparent(struct rdtgroup *rdtgrp,
3830 			    struct rdtgroup *new_prdtgrp,
3831 			    cpumask_var_t cpus)
3832 {
3833 	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3834 
3835 	WARN_ON(rdtgrp->type != RDTMON_GROUP);
3836 	WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
3837 
3838 	/* Nothing to do when simply renaming a MON group. */
3839 	if (prdtgrp == new_prdtgrp)
3840 		return;
3841 
3842 	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3843 	list_move_tail(&rdtgrp->mon.crdtgrp_list,
3844 		       &new_prdtgrp->mon.crdtgrp_list);
3845 
3846 	rdtgrp->mon.parent = new_prdtgrp;
3847 	rdtgrp->closid = new_prdtgrp->closid;
3848 
3849 	/* Propagate updated closid to all tasks in this group. */
3850 	rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
3851 
3852 	update_closid_rmid(cpus, NULL);
3853 }
3854 
rdtgroup_rename(struct kernfs_node * kn,struct kernfs_node * new_parent,const char * new_name)3855 static int rdtgroup_rename(struct kernfs_node *kn,
3856 			   struct kernfs_node *new_parent, const char *new_name)
3857 {
3858 	struct rdtgroup *new_prdtgrp;
3859 	struct rdtgroup *rdtgrp;
3860 	cpumask_var_t tmpmask;
3861 	int ret;
3862 
3863 	rdtgrp = kernfs_to_rdtgroup(kn);
3864 	new_prdtgrp = kernfs_to_rdtgroup(new_parent);
3865 	if (!rdtgrp || !new_prdtgrp)
3866 		return -ENOENT;
3867 
3868 	/* Release both kernfs active_refs before obtaining rdtgroup mutex. */
3869 	rdtgroup_kn_get(rdtgrp, kn);
3870 	rdtgroup_kn_get(new_prdtgrp, new_parent);
3871 
3872 	mutex_lock(&rdtgroup_mutex);
3873 
3874 	rdt_last_cmd_clear();
3875 
3876 	/*
3877 	 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
3878 	 * either kernfs_node is a file.
3879 	 */
3880 	if (kernfs_type(kn) != KERNFS_DIR ||
3881 	    kernfs_type(new_parent) != KERNFS_DIR) {
3882 		rdt_last_cmd_puts("Source and destination must be directories");
3883 		ret = -EPERM;
3884 		goto out;
3885 	}
3886 
3887 	if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
3888 		ret = -ENOENT;
3889 		goto out;
3890 	}
3891 
3892 	if (rdtgrp->type != RDTMON_GROUP || !kn->parent ||
3893 	    !is_mon_groups(kn->parent, kn->name)) {
3894 		rdt_last_cmd_puts("Source must be a MON group\n");
3895 		ret = -EPERM;
3896 		goto out;
3897 	}
3898 
3899 	if (!is_mon_groups(new_parent, new_name)) {
3900 		rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
3901 		ret = -EPERM;
3902 		goto out;
3903 	}
3904 
3905 	/*
3906 	 * If the MON group is monitoring CPUs, the CPUs must be assigned to the
3907 	 * current parent CTRL_MON group and therefore cannot be assigned to
3908 	 * the new parent, making the move illegal.
3909 	 */
3910 	if (!cpumask_empty(&rdtgrp->cpu_mask) &&
3911 	    rdtgrp->mon.parent != new_prdtgrp) {
3912 		rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
3913 		ret = -EPERM;
3914 		goto out;
3915 	}
3916 
3917 	/*
3918 	 * Allocate the cpumask for use in mongrp_reparent() to avoid the
3919 	 * possibility of failing to allocate it after kernfs_rename() has
3920 	 * succeeded.
3921 	 */
3922 	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
3923 		ret = -ENOMEM;
3924 		goto out;
3925 	}
3926 
3927 	/*
3928 	 * Perform all input validation and allocations needed to ensure
3929 	 * mongrp_reparent() will succeed before calling kernfs_rename(),
3930 	 * otherwise it would be necessary to revert this call if
3931 	 * mongrp_reparent() failed.
3932 	 */
3933 	ret = kernfs_rename(kn, new_parent, new_name);
3934 	if (!ret)
3935 		mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
3936 
3937 	free_cpumask_var(tmpmask);
3938 
3939 out:
3940 	mutex_unlock(&rdtgroup_mutex);
3941 	rdtgroup_kn_put(rdtgrp, kn);
3942 	rdtgroup_kn_put(new_prdtgrp, new_parent);
3943 	return ret;
3944 }
3945 
rdtgroup_show_options(struct seq_file * seq,struct kernfs_root * kf)3946 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
3947 {
3948 	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
3949 		seq_puts(seq, ",cdp");
3950 
3951 	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
3952 		seq_puts(seq, ",cdpl2");
3953 
3954 	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
3955 		seq_puts(seq, ",mba_MBps");
3956 
3957 	if (resctrl_debug)
3958 		seq_puts(seq, ",debug");
3959 
3960 	return 0;
3961 }
3962 
3963 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
3964 	.mkdir		= rdtgroup_mkdir,
3965 	.rmdir		= rdtgroup_rmdir,
3966 	.rename		= rdtgroup_rename,
3967 	.show_options	= rdtgroup_show_options,
3968 };
3969 
rdtgroup_setup_root(struct rdt_fs_context * ctx)3970 static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
3971 {
3972 	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
3973 				      KERNFS_ROOT_CREATE_DEACTIVATED |
3974 				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
3975 				      &rdtgroup_default);
3976 	if (IS_ERR(rdt_root))
3977 		return PTR_ERR(rdt_root);
3978 
3979 	ctx->kfc.root = rdt_root;
3980 	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
3981 
3982 	return 0;
3983 }
3984 
rdtgroup_destroy_root(void)3985 static void rdtgroup_destroy_root(void)
3986 {
3987 	kernfs_destroy_root(rdt_root);
3988 	rdtgroup_default.kn = NULL;
3989 }
3990 
rdtgroup_setup_default(void)3991 static void __init rdtgroup_setup_default(void)
3992 {
3993 	mutex_lock(&rdtgroup_mutex);
3994 
3995 	rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
3996 	rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
3997 	rdtgroup_default.type = RDTCTRL_GROUP;
3998 	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
3999 
4000 	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
4001 
4002 	mutex_unlock(&rdtgroup_mutex);
4003 }
4004 
domain_destroy_mon_state(struct rdt_mon_domain * d)4005 static void domain_destroy_mon_state(struct rdt_mon_domain *d)
4006 {
4007 	bitmap_free(d->rmid_busy_llc);
4008 	kfree(d->mbm_total);
4009 	kfree(d->mbm_local);
4010 }
4011 
resctrl_offline_ctrl_domain(struct rdt_resource * r,struct rdt_ctrl_domain * d)4012 void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
4013 {
4014 	mutex_lock(&rdtgroup_mutex);
4015 
4016 	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
4017 		mba_sc_domain_destroy(r, d);
4018 
4019 	mutex_unlock(&rdtgroup_mutex);
4020 }
4021 
resctrl_offline_mon_domain(struct rdt_resource * r,struct rdt_mon_domain * d)4022 void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4023 {
4024 	mutex_lock(&rdtgroup_mutex);
4025 
4026 	/*
4027 	 * If resctrl is mounted, remove all the
4028 	 * per domain monitor data directories.
4029 	 */
4030 	if (resctrl_mounted && resctrl_arch_mon_capable())
4031 		rmdir_mondata_subdir_allrdtgrp(r, d);
4032 
4033 	if (is_mbm_enabled())
4034 		cancel_delayed_work(&d->mbm_over);
4035 	if (is_llc_occupancy_enabled() && has_busy_rmid(d)) {
4036 		/*
4037 		 * When a package is going down, forcefully
4038 		 * decrement rmid->ebusy. There is no way to know
4039 		 * that the L3 was flushed and hence may lead to
4040 		 * incorrect counts in rare scenarios, but leaving
4041 		 * the RMID as busy creates RMID leaks if the
4042 		 * package never comes back.
4043 		 */
4044 		__check_limbo(d, true);
4045 		cancel_delayed_work(&d->cqm_limbo);
4046 	}
4047 
4048 	domain_destroy_mon_state(d);
4049 
4050 	mutex_unlock(&rdtgroup_mutex);
4051 }
4052 
domain_setup_mon_state(struct rdt_resource * r,struct rdt_mon_domain * d)4053 static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d)
4054 {
4055 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
4056 	size_t tsize;
4057 
4058 	if (is_llc_occupancy_enabled()) {
4059 		d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
4060 		if (!d->rmid_busy_llc)
4061 			return -ENOMEM;
4062 	}
4063 	if (is_mbm_total_enabled()) {
4064 		tsize = sizeof(*d->mbm_total);
4065 		d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
4066 		if (!d->mbm_total) {
4067 			bitmap_free(d->rmid_busy_llc);
4068 			return -ENOMEM;
4069 		}
4070 	}
4071 	if (is_mbm_local_enabled()) {
4072 		tsize = sizeof(*d->mbm_local);
4073 		d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
4074 		if (!d->mbm_local) {
4075 			bitmap_free(d->rmid_busy_llc);
4076 			kfree(d->mbm_total);
4077 			return -ENOMEM;
4078 		}
4079 	}
4080 
4081 	return 0;
4082 }
4083 
resctrl_online_ctrl_domain(struct rdt_resource * r,struct rdt_ctrl_domain * d)4084 int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
4085 {
4086 	int err = 0;
4087 
4088 	mutex_lock(&rdtgroup_mutex);
4089 
4090 	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
4091 		/* RDT_RESOURCE_MBA is never mon_capable */
4092 		err = mba_sc_domain_allocate(r, d);
4093 	}
4094 
4095 	mutex_unlock(&rdtgroup_mutex);
4096 
4097 	return err;
4098 }
4099 
resctrl_online_mon_domain(struct rdt_resource * r,struct rdt_mon_domain * d)4100 int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
4101 {
4102 	int err;
4103 
4104 	mutex_lock(&rdtgroup_mutex);
4105 
4106 	err = domain_setup_mon_state(r, d);
4107 	if (err)
4108 		goto out_unlock;
4109 
4110 	if (is_mbm_enabled()) {
4111 		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
4112 		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
4113 					   RESCTRL_PICK_ANY_CPU);
4114 	}
4115 
4116 	if (is_llc_occupancy_enabled())
4117 		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
4118 
4119 	/*
4120 	 * If the filesystem is not mounted then only the default resource group
4121 	 * exists. Creation of its directories is deferred until mount time
4122 	 * by rdt_get_tree() calling mkdir_mondata_all().
4123 	 * If resctrl is mounted, add per domain monitor data directories.
4124 	 */
4125 	if (resctrl_mounted && resctrl_arch_mon_capable())
4126 		mkdir_mondata_subdir_allrdtgrp(r, d);
4127 
4128 out_unlock:
4129 	mutex_unlock(&rdtgroup_mutex);
4130 
4131 	return err;
4132 }
4133 
resctrl_online_cpu(unsigned int cpu)4134 void resctrl_online_cpu(unsigned int cpu)
4135 {
4136 	mutex_lock(&rdtgroup_mutex);
4137 	/* The CPU is set in default rdtgroup after online. */
4138 	cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
4139 	mutex_unlock(&rdtgroup_mutex);
4140 }
4141 
clear_childcpus(struct rdtgroup * r,unsigned int cpu)4142 static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
4143 {
4144 	struct rdtgroup *cr;
4145 
4146 	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
4147 		if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
4148 			break;
4149 	}
4150 }
4151 
resctrl_offline_cpu(unsigned int cpu)4152 void resctrl_offline_cpu(unsigned int cpu)
4153 {
4154 	struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
4155 	struct rdt_mon_domain *d;
4156 	struct rdtgroup *rdtgrp;
4157 
4158 	mutex_lock(&rdtgroup_mutex);
4159 	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
4160 		if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
4161 			clear_childcpus(rdtgrp, cpu);
4162 			break;
4163 		}
4164 	}
4165 
4166 	if (!l3->mon_capable)
4167 		goto out_unlock;
4168 
4169 	d = get_mon_domain_from_cpu(cpu, l3);
4170 	if (d) {
4171 		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
4172 			cancel_delayed_work(&d->mbm_over);
4173 			mbm_setup_overflow_handler(d, 0, cpu);
4174 		}
4175 		if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
4176 		    has_busy_rmid(d)) {
4177 			cancel_delayed_work(&d->cqm_limbo);
4178 			cqm_setup_limbo_handler(d, 0, cpu);
4179 		}
4180 	}
4181 
4182 out_unlock:
4183 	mutex_unlock(&rdtgroup_mutex);
4184 }
4185 
4186 /*
4187  * rdtgroup_init - rdtgroup initialization
4188  *
4189  * Setup resctrl file system including set up root, create mount point,
4190  * register rdtgroup filesystem, and initialize files under root directory.
4191  *
4192  * Return: 0 on success or -errno
4193  */
rdtgroup_init(void)4194 int __init rdtgroup_init(void)
4195 {
4196 	int ret = 0;
4197 
4198 	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
4199 		     sizeof(last_cmd_status_buf));
4200 
4201 	rdtgroup_setup_default();
4202 
4203 	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
4204 	if (ret)
4205 		return ret;
4206 
4207 	ret = register_filesystem(&rdt_fs_type);
4208 	if (ret)
4209 		goto cleanup_mountpoint;
4210 
4211 	/*
4212 	 * Adding the resctrl debugfs directory here may not be ideal since
4213 	 * it would let the resctrl debugfs directory appear on the debugfs
4214 	 * filesystem before the resctrl filesystem is mounted.
4215 	 * It may also be ok since that would enable debugging of RDT before
4216 	 * resctrl is mounted.
4217 	 * The reason why the debugfs directory is created here and not in
4218 	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
4219 	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
4220 	 * (the lockdep class of inode->i_rwsem). Other filesystem
4221 	 * interactions (eg. SyS_getdents) have the lock ordering:
4222 	 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
4223 	 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
4224 	 * is taken, thus creating dependency:
4225 	 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
4226 	 * issues considering the other two lock dependencies.
4227 	 * By creating the debugfs directory here we avoid a dependency
4228 	 * that may cause deadlock (even though file operations cannot
4229 	 * occur until the filesystem is mounted, but I do not know how to
4230 	 * tell lockdep that).
4231 	 */
4232 	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
4233 
4234 	return 0;
4235 
4236 cleanup_mountpoint:
4237 	sysfs_remove_mount_point(fs_kobj, "resctrl");
4238 
4239 	return ret;
4240 }
4241 
rdtgroup_exit(void)4242 void __exit rdtgroup_exit(void)
4243 {
4244 	debugfs_remove_recursive(debugfs_resctrl);
4245 	unregister_filesystem(&rdt_fs_type);
4246 	sysfs_remove_mount_point(fs_kobj, "resctrl");
4247 }
4248