tlb.c - OpenGrok cross reference for /linux-6.14.4/arch/x86/mm/tlb.c

Lines Matching +full:next +full:- +full:mode
1 // SPDX-License-Identifier: GPL-2.0-only
18 #include <asm/nospec-branch.h>
38  *	TLB flushing, formerly SMP-only
69  * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
76  * ASID  - [0, TLB_NR_DYN_ASIDS-1]
79  * kPCID - [1, TLB_NR_DYN_ASIDS]
83  * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
100 #define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
103  * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
104  * for them being zero-based.  Another -1 is because PCID 0 is reserved for
105  * use by non-PCID-aware users.
107 #define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
130 	 * The dynamically-assigned ASIDs that get passed in are small  in kern_pcid()
134 	 * If PCID is on, ASID-aware code paths put the ASID+1 into the  in kern_pcid()
136 	 * situation in which PCID-unaware code saves CR3, loads some other  in kern_pcid()
139 	 * that any bugs involving loading a PCID-enabled CR3 with  in kern_pcid()
206 		 * Make sure the next time we go to switch to  in clear_asid_other()
217 static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,  in choose_new_asid()  argument
233 		    next->context.ctx_id)  in choose_new_asid()
246 	*new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;  in choose_new_asid()
256  * until the next time we switch to it.
305 	 * It's plausible that we're in lazy TLB mode while our mm is init_mm.  in leave_mm()
322 void switch_mm(struct mm_struct *prev, struct mm_struct *next,  in switch_mm()  argument
328 	switch_mm_irqs_off(NULL, next, tsk);  in switch_mm()
333  * Invoked from return to user/guest by a task that opted-in to L1D
344 				struct task_struct *next)  in l1d_flush_evaluate()  argument
361 		clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH);  in l1d_flush_evaluate()
362 		next->l1d_flush_kill.func = l1d_flush_force_sigbus;  in l1d_flush_evaluate()
363 		task_work_add(next, &next->l1d_flush_kill, TWA_RESUME);  in l1d_flush_evaluate()
367 static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)  in mm_mangle_tif_spec_bits()  argument
369 	unsigned long next_tif = read_task_thread_flags(next);  in mm_mangle_tif_spec_bits()
378 	return (unsigned long)next->mm | spec_bits;  in mm_mangle_tif_spec_bits()
381 static void cond_mitigation(struct task_struct *next)  in cond_mitigation()  argument
385 	if (!next || !next->mm)  in cond_mitigation()
388 	next_mm = mm_mangle_tif_spec_bits(next);  in cond_mitigation()
394 	 * doing Spectre-v2 attacks on another.  in cond_mitigation()
396 	 * Both, the conditional and the always IBPB mode use the mm  in cond_mitigation()
398 	 * same process. Using the mm pointer instead of mm->context.ctx_id  in cond_mitigation()
406 		 * This is a bit more complex than the always mode because  in cond_mitigation()
421 		 * - the same user space task is scheduled out and later  in cond_mitigation()
425 		 * - a user space task belonging to the same process is  in cond_mitigation()
428 		 * - a user space task belonging to the same process is  in cond_mitigation()
451 					(unsigned long)next->mm)  in cond_mitigation()
462 			l1d_flush_evaluate(prev_mm, next_mm, next);  in cond_mitigation()
473 	     atomic_read(&mm->context.perf_rdpmc_allowed))) {  in cr4_update_pce_mm()
497  * 'current->active_mm' up to date.
499 void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,  in switch_mm_irqs_off()  argument
525 	if (WARN_ON_ONCE(__read_cr3() != build_cr3(prev->pgd, prev_asid,  in switch_mm_irqs_off()
547 	 * core serialization before returning to user-space, after  in switch_mm_irqs_off()
548 	 * storing to rq->curr, when changing mm.  This is because  in switch_mm_irqs_off()
557 	if (prev == next) {  in switch_mm_irqs_off()
560 			   next->context.ctx_id);  in switch_mm_irqs_off()
568 		 * Even in lazy TLB mode, the CPU should stay set in the  in switch_mm_irqs_off()
573 				 !cpumask_test_cpu(cpu, mm_cpumask(next))))  in switch_mm_irqs_off()
574 			cpumask_set_cpu(cpu, mm_cpumask(next));  in switch_mm_irqs_off()
577 		 * If the CPU is not in lazy TLB mode, we are just switching  in switch_mm_irqs_off()
591 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);  in switch_mm_irqs_off()
598 		 * mode. Fall through to the TLB switching code below.  in switch_mm_irqs_off()
618 		if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next)))  in switch_mm_irqs_off()
619 			cpumask_set_cpu(cpu, mm_cpumask(next));  in switch_mm_irqs_off()
620 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);  in switch_mm_irqs_off()
622 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);  in switch_mm_irqs_off()
629 	new_lam = mm_lam_cr3_mask(next);  in switch_mm_irqs_off()
631 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);  in switch_mm_irqs_off()
633 		load_new_mm_cr3(next->pgd, new_asid, new_lam, true);  in switch_mm_irqs_off()
638 		load_new_mm_cr3(next->pgd, new_asid, new_lam, false);  in switch_mm_irqs_off()
646 	this_cpu_write(cpu_tlbstate.loaded_mm, next);  in switch_mm_irqs_off()
648 	cpu_tlbstate_update_lam(new_lam, mm_untag_mask(next));  in switch_mm_irqs_off()
650 	if (next != prev) {  in switch_mm_irqs_off()
651 		cr4_update_pce_mm(next);  in switch_mm_irqs_off()
652 		switch_ldt(prev, next);  in switch_mm_irqs_off()
681  * - The ASID changed from what cpu_tlbstate thinks it is (most likely
685  * - The TLB contains junk in slots corresponding to inactive ASIDs.
687  * - The CPU went so far out to lunch that it may have missed a TLB
699 	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));  in initialize_tlbstate_and_flush()
708 	 * long mode.)  in initialize_tlbstate_and_flush()
714 	write_cr3(build_cr3(mm->pgd, 0, 0));  in initialize_tlbstate_and_flush()
720 	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);  in initialize_tlbstate_and_flush()
740 	 * - mm_tlb_gen:     the latest generation.  in flush_tlb_func()
741 	 * - local_tlb_gen:  the generation that this CPU has already caught  in flush_tlb_func()
743 	 * - f->new_tlb_gen: the generation that the requester of the flush  in flush_tlb_func()
750 	bool local = smp_processor_id() == f->initiating_cpu;  in flush_tlb_func()
763 	if (f->mm && f->mm != loaded_mm) {  in flush_tlb_func()
764 		cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm));  in flush_tlb_func()
773 		   loaded_mm->context.ctx_id);  in flush_tlb_func()
777 		 * We're in lazy mode.  We need to at least flush our  in flush_tlb_func()
778 		 * paging-structure cache to avoid speculatively reading  in flush_tlb_func()
783 		 * IPIs to lazy TLB mode CPUs.  in flush_tlb_func()
789 	if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&  in flush_tlb_func()
790 		     f->new_tlb_gen <= local_tlb_gen)) {  in flush_tlb_func()
792 		 * The TLB is already up to date in respect to f->new_tlb_gen.  in flush_tlb_func()
804 	mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);  in flush_tlb_func()
809 		 * happen if two concurrent flushes happen -- the first flush to  in flush_tlb_func()
817 	WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);  in flush_tlb_func()
822 	 * possible that f->new_tlb_gen <= local_tlb_gen), but we're  in flush_tlb_func()
831 	 * 1. f->new_tlb_gen == local_tlb_gen + 1.  We have an invariant that  in flush_tlb_func()
834 	 *    f->new_tlb_gen == 3, then we know that the flush needed to bring  in flush_tlb_func()
848 	 * 2. f->new_tlb_gen == mm_tlb_gen.  This is purely an optimization.  in flush_tlb_func()
856 	if (f->end != TLB_FLUSH_ALL &&  in flush_tlb_func()
857 	    f->new_tlb_gen == local_tlb_gen + 1 &&  in flush_tlb_func()
858 	    f->new_tlb_gen == mm_tlb_gen) {  in flush_tlb_func()
860 		unsigned long addr = f->start;  in flush_tlb_func()
863 		VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID);  in flush_tlb_func()
866 		VM_WARN_ON(f->mm == NULL);  in flush_tlb_func()
868 		nr_invalidate = (f->end - f->start) >> f->stride_shift;  in flush_tlb_func()
870 		while (addr < f->end) {  in flush_tlb_func()
872 			addr += 1UL << f->stride_shift;  in flush_tlb_func()
891 				(f->mm == NULL) ? TLB_LOCAL_SHOOTDOWN :  in flush_tlb_func()
900 	/* Lazy TLB will get flushed at the next context switch. */  in should_flush_tlb()
905 	if (!info->mm)  in should_flush_tlb()
909 	if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm)  in should_flush_tlb()
913 	if (info->trim_cpumask)  in should_flush_tlb()
921 	if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) {  in should_trim_cpumask()
922 		WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ);  in should_trim_cpumask()
940 	if (info->end == TLB_FLUSH_ALL)  in native_flush_tlb_multi()
944 				(info->end - info->start) >> PAGE_SHIFT);  in native_flush_tlb_multi()
948 	 * CPUs in lazy TLB mode. They will flush the CPU themselves  in native_flush_tlb_multi()
949 	 * at the next context switch.  in native_flush_tlb_multi()
952 	 * IPI everywhere, to prevent CPUs in lazy TLB mode from tripping  in native_flush_tlb_multi()
956 	if (info->freed_tables)  in native_flush_tlb_multi()
996 	 * Ensure that the following code is non-reentrant and flush_tlb_info  in get_flush_tlb_info()
998 	 * interrupt handlers and machine-check exception handlers.  in get_flush_tlb_info()
1003 	info->start		= start;  in get_flush_tlb_info()
1004 	info->end		= end;  in get_flush_tlb_info()
1005 	info->mm		= mm;  in get_flush_tlb_info()
1006 	info->stride_shift	= stride_shift;  in get_flush_tlb_info()
1007 	info->freed_tables	= freed_tables;  in get_flush_tlb_info()
1008 	info->new_tlb_gen	= new_tlb_gen;  in get_flush_tlb_info()
1009 	info->initiating_cpu	= smp_processor_id();  in get_flush_tlb_info()
1010 	info->trim_cpumask	= 0;  in get_flush_tlb_info()
1036 	    ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {  in flush_tlb_mm_range()
1049 	 * a local TLB flush is needed. Optimize this use-case by calling  in flush_tlb_mm_range()
1053 		info->trim_cpumask = should_trim_cpumask(mm);  in flush_tlb_mm_range()
1086 	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)  in do_kernel_range_flush()
1094 	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {  in flush_tlb_kernel_range()
1120 		build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,  in __get_current_cr3_fast()
1145 	 * If PTI is on, then the kernel is mapped with non-global PTEs, and  in flush_tlb_one_kernel()
1217 	 * Read-modify-write to CR4 - protect it from preemption and  in native_flush_tlb_global()
1242 	/* If current->mm == NULL then the read_cr3() "borrows" an mm */  in native_flush_tlb_local()
1266 		 * !PGE -> !PCID (setup_pcid()), thus every flush is total.  in __flush_tlb_all()
1283 	 * a local TLB flush is needed. Optimize this use-case by calling  in arch_tlbbatch_flush()
1286 	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {  in arch_tlbbatch_flush()
1287 		flush_tlb_multi(&batch->cpumask, info);  in arch_tlbbatch_flush()
1288 	} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {  in arch_tlbbatch_flush()
1295 	cpumask_clear(&batch->cpumask);  in arch_tlbbatch_flush()
1311 	struct mm_struct *current_mm = current->mm;  in nmi_uaccess_okay()
1317 	 * current_mm->pgd == __va(read_cr3_pa()).  This may be slow, though,  in nmi_uaccess_okay()
1328 	VM_WARN_ON_ONCE(current_mm->pgd != __va(read_cr3_pa()));  in nmi_uaccess_okay()
1350 	len = min(count, sizeof(buf) - 1);  in tlbflush_write_file()
1352 		return -EFAULT;  in tlbflush_write_file()
1356 		return -EINVAL;  in tlbflush_write_file()
1359 		return -EINVAL;  in tlbflush_write_file()