1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * SMP support for PowerNV machines.
4 *
5 * Copyright 2011 IBM Corp.
6 */
7
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/sched.h>
11 #include <linux/sched/hotplug.h>
12 #include <linux/smp.h>
13 #include <linux/interrupt.h>
14 #include <linux/delay.h>
15 #include <linux/init.h>
16 #include <linux/spinlock.h>
17 #include <linux/cpu.h>
18
19 #include <asm/irq.h>
20 #include <asm/smp.h>
21 #include <asm/paca.h>
22 #include <asm/machdep.h>
23 #include <asm/cputable.h>
24 #include <asm/firmware.h>
25 #include <asm/vdso_datapage.h>
26 #include <asm/cputhreads.h>
27 #include <asm/xics.h>
28 #include <asm/xive.h>
29 #include <asm/opal.h>
30 #include <asm/runlatch.h>
31 #include <asm/text-patching.h>
32 #include <asm/dbell.h>
33 #include <asm/kvm_ppc.h>
34 #include <asm/ppc-opcode.h>
35 #include <asm/cpuidle.h>
36 #include <asm/kexec.h>
37 #include <asm/reg.h>
38 #include <asm/powernv.h>
39 #include <asm/systemcfg.h>
40
41 #include "powernv.h"
42
43 #ifdef DEBUG
44 #include <asm/udbg.h>
45 #define DBG(fmt...) udbg_printf(fmt)
46 #else
47 #define DBG(fmt...) do { } while (0)
48 #endif
49
pnv_smp_setup_cpu(int cpu)50 static void pnv_smp_setup_cpu(int cpu)
51 {
52 /*
53 * P9 workaround for CI vector load (see traps.c),
54 * enable the corresponding HMI interrupt
55 */
56 if (pvr_version_is(PVR_POWER9))
57 mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
58
59 if (xive_enabled())
60 xive_smp_setup_cpu();
61 else if (cpu != boot_cpuid)
62 xics_setup_cpu();
63 }
64
pnv_smp_kick_cpu(int nr)65 static int pnv_smp_kick_cpu(int nr)
66 {
67 unsigned int pcpu;
68 unsigned long start_here =
69 __pa(ppc_function_entry(generic_secondary_smp_init));
70 long rc;
71 uint8_t status;
72
73 if (nr < 0 || nr >= nr_cpu_ids)
74 return -EINVAL;
75
76 pcpu = get_hard_smp_processor_id(nr);
77 /*
78 * If we already started or OPAL is not supported, we just
79 * kick the CPU via the PACA
80 */
81 if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
82 goto kick;
83
84 /*
85 * At this point, the CPU can either be spinning on the way in
86 * from kexec or be inside OPAL waiting to be started for the
87 * first time. OPAL v3 allows us to query OPAL to know if it
88 * has the CPUs, so we do that
89 */
90 rc = opal_query_cpu_status(pcpu, &status);
91 if (rc != OPAL_SUCCESS) {
92 pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr);
93 return -ENODEV;
94 }
95
96 /*
97 * Already started, just kick it, probably coming from
98 * kexec and spinning
99 */
100 if (status == OPAL_THREAD_STARTED)
101 goto kick;
102
103 /*
104 * Available/inactive, let's kick it
105 */
106 if (status == OPAL_THREAD_INACTIVE) {
107 pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
108 rc = opal_start_cpu(pcpu, start_here);
109 if (rc != OPAL_SUCCESS) {
110 pr_warn("OPAL Error %ld starting CPU %d\n", rc, nr);
111 return -ENODEV;
112 }
113 } else {
114 /*
115 * An unavailable CPU (or any other unknown status)
116 * shouldn't be started. It should also
117 * not be in the possible map but currently it can
118 * happen
119 */
120 pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
121 " (status %d)...\n", nr, pcpu, status);
122 return -ENODEV;
123 }
124
125 kick:
126 return smp_generic_kick_cpu(nr);
127 }
128
129 #ifdef CONFIG_HOTPLUG_CPU
130
pnv_smp_cpu_disable(void)131 static int pnv_smp_cpu_disable(void)
132 {
133 int cpu = smp_processor_id();
134
135 /* This is identical to pSeries... might consolidate by
136 * moving migrate_irqs_away to a ppc_md with default to
137 * the generic fixup_irqs. --BenH.
138 */
139 set_cpu_online(cpu, false);
140 #ifdef CONFIG_PPC64_PROC_SYSTEMCFG
141 systemcfg->processorCount--;
142 #endif
143 if (cpu == boot_cpuid)
144 boot_cpuid = cpumask_any(cpu_online_mask);
145 if (xive_enabled())
146 xive_smp_disable_cpu();
147 else
148 xics_migrate_irqs_away();
149
150 cleanup_cpu_mmu_context();
151
152 return 0;
153 }
154
pnv_flush_interrupts(void)155 static void pnv_flush_interrupts(void)
156 {
157 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
158 if (xive_enabled())
159 xive_flush_interrupt();
160 else
161 icp_opal_flush_interrupt();
162 } else {
163 icp_native_flush_interrupt();
164 }
165 }
166
pnv_cpu_offline_self(void)167 static void pnv_cpu_offline_self(void)
168 {
169 unsigned long srr1, unexpected_mask, wmask;
170 unsigned int cpu;
171 u64 lpcr_val;
172
173 /* Standard hot unplug procedure */
174
175 idle_task_exit();
176 cpu = smp_processor_id();
177 DBG("CPU%d offline\n", cpu);
178 generic_set_cpu_dead(cpu);
179 smp_wmb();
180
181 wmask = SRR1_WAKEMASK;
182 if (cpu_has_feature(CPU_FTR_ARCH_207S))
183 wmask = SRR1_WAKEMASK_P8;
184
185 /*
186 * This turns the irq soft-disabled state we're called with, into a
187 * hard-disabled state with pending irq_happened interrupts cleared.
188 *
189 * PACA_IRQ_DEC - Decrementer should be ignored.
190 * PACA_IRQ_HMI - Can be ignored, processing is done in real mode.
191 * PACA_IRQ_DBELL, EE, PMI - Unexpected.
192 */
193 hard_irq_disable();
194 if (generic_check_cpu_restart(cpu))
195 goto out;
196
197 unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS);
198 if (local_paca->irq_happened & unexpected_mask) {
199 if (local_paca->irq_happened & PACA_IRQ_EE)
200 pnv_flush_interrupts();
201 DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n",
202 cpu, local_paca->irq_happened);
203 }
204 local_paca->irq_happened = PACA_IRQ_HARD_DIS;
205
206 /*
207 * We don't want to take decrementer interrupts while we are
208 * offline, so clear LPCR:PECE1. We keep PECE2 (and
209 * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in.
210 *
211 * If the CPU gets woken up by a special wakeup, ensure that
212 * the SLW engine sets LPCR with decrementer bit cleared, else
213 * the CPU will come back to the kernel due to a spurious
214 * wakeup.
215 */
216 lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
217 pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
218
219 while (!generic_check_cpu_restart(cpu)) {
220 /*
221 * Clear IPI flag, since we don't handle IPIs while
222 * offline, except for those when changing micro-threading
223 * mode, which are handled explicitly below, and those
224 * for coming online, which are handled via
225 * generic_check_cpu_restart() calls.
226 */
227 kvmppc_clear_host_ipi(cpu);
228
229 srr1 = pnv_cpu_offline(cpu);
230
231 WARN_ON_ONCE(!irqs_disabled());
232 WARN_ON(lazy_irq_pending());
233
234 /*
235 * If the SRR1 value indicates that we woke up due to
236 * an external interrupt, then clear the interrupt.
237 * We clear the interrupt before checking for the
238 * reason, so as to avoid a race where we wake up for
239 * some other reason, find nothing and clear the interrupt
240 * just as some other cpu is sending us an interrupt.
241 * If we returned from power7_nap as a result of
242 * having finished executing in a KVM guest, then srr1
243 * contains 0.
244 */
245 if (((srr1 & wmask) == SRR1_WAKEEE) ||
246 ((srr1 & wmask) == SRR1_WAKEHVI)) {
247 pnv_flush_interrupts();
248 } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
249 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
250 asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
251 } else if ((srr1 & wmask) == SRR1_WAKERESET) {
252 irq_set_pending_from_srr1(srr1);
253 /* Does not return */
254 }
255
256 smp_mb();
257
258 /*
259 * For kdump kernels, we process the ipi and jump to
260 * crash_ipi_callback
261 */
262 if (kdump_in_progress()) {
263 /*
264 * If we got to this point, we've not used
265 * NMI's, otherwise we would have gone
266 * via the SRR1_WAKERESET path. We are
267 * using regular IPI's for waking up offline
268 * threads.
269 */
270 struct pt_regs regs;
271
272 ppc_save_regs(®s);
273 crash_ipi_callback(®s);
274 /* Does not return */
275 }
276
277 if (cpu_core_split_required())
278 continue;
279
280 if (srr1 && !generic_check_cpu_restart(cpu))
281 DBG("CPU%d Unexpected exit while offline srr1=%lx!\n",
282 cpu, srr1);
283
284 }
285
286 /*
287 * Re-enable decrementer interrupts in LPCR.
288 *
289 * Further, we want stop states to be woken up by decrementer
290 * for non-hotplug cases. So program the LPCR via stop api as
291 * well.
292 */
293 lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
294 pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
295 out:
296 DBG("CPU%d coming online...\n", cpu);
297 }
298
299 #endif /* CONFIG_HOTPLUG_CPU */
300
pnv_cpu_bootable(unsigned int nr)301 static int pnv_cpu_bootable(unsigned int nr)
302 {
303 /*
304 * Starting with POWER8, the subcore logic relies on all threads of a
305 * core being booted so that they can participate in split mode
306 * switches. So on those machines we ignore the smt_enabled_at_boot
307 * setting (smt-enabled on the kernel command line).
308 */
309 if (cpu_has_feature(CPU_FTR_ARCH_207S))
310 return 1;
311
312 return smp_generic_cpu_bootable(nr);
313 }
314
pnv_smp_prepare_cpu(int cpu)315 static int pnv_smp_prepare_cpu(int cpu)
316 {
317 if (xive_enabled())
318 return xive_smp_prepare_cpu(cpu);
319 return 0;
320 }
321
322 /* Cause IPI as setup by the interrupt controller (xics or xive) */
323 static void (*ic_cause_ipi)(int cpu);
324
pnv_cause_ipi(int cpu)325 static void pnv_cause_ipi(int cpu)
326 {
327 if (doorbell_try_core_ipi(cpu))
328 return;
329
330 ic_cause_ipi(cpu);
331 }
332
pnv_smp_probe(void)333 static void __init pnv_smp_probe(void)
334 {
335 if (xive_enabled())
336 xive_smp_probe();
337 else
338 xics_smp_probe();
339
340 if (cpu_has_feature(CPU_FTR_DBELL)) {
341 ic_cause_ipi = smp_ops->cause_ipi;
342 WARN_ON(!ic_cause_ipi);
343
344 if (cpu_has_feature(CPU_FTR_ARCH_300))
345 smp_ops->cause_ipi = doorbell_global_ipi;
346 else
347 smp_ops->cause_ipi = pnv_cause_ipi;
348 }
349 }
350
pnv_system_reset_exception(struct pt_regs * regs)351 noinstr static int pnv_system_reset_exception(struct pt_regs *regs)
352 {
353 if (smp_handle_nmi_ipi(regs))
354 return 1;
355 return 0;
356 }
357
pnv_cause_nmi_ipi(int cpu)358 static int pnv_cause_nmi_ipi(int cpu)
359 {
360 int64_t rc;
361
362 if (cpu >= 0) {
363 int h = get_hard_smp_processor_id(cpu);
364
365 if (opal_check_token(OPAL_QUIESCE))
366 opal_quiesce(QUIESCE_HOLD, h);
367
368 rc = opal_signal_system_reset(h);
369
370 if (opal_check_token(OPAL_QUIESCE))
371 opal_quiesce(QUIESCE_RESUME, h);
372
373 if (rc != OPAL_SUCCESS)
374 return 0;
375 return 1;
376
377 } else if (cpu == NMI_IPI_ALL_OTHERS) {
378 bool success = true;
379 int c;
380
381 if (opal_check_token(OPAL_QUIESCE))
382 opal_quiesce(QUIESCE_HOLD, -1);
383
384 /*
385 * We do not use broadcasts (yet), because it's not clear
386 * exactly what semantics Linux wants or the firmware should
387 * provide.
388 */
389 for_each_online_cpu(c) {
390 if (c == smp_processor_id())
391 continue;
392
393 rc = opal_signal_system_reset(
394 get_hard_smp_processor_id(c));
395 if (rc != OPAL_SUCCESS)
396 success = false;
397 }
398
399 if (opal_check_token(OPAL_QUIESCE))
400 opal_quiesce(QUIESCE_RESUME, -1);
401
402 if (success)
403 return 1;
404
405 /*
406 * Caller will fall back to doorbells, which may pick
407 * up the remainders.
408 */
409 }
410
411 return 0;
412 }
413
414 static struct smp_ops_t pnv_smp_ops = {
415 .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
416 .cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */
417 .cause_nmi_ipi = NULL,
418 .probe = pnv_smp_probe,
419 .prepare_cpu = pnv_smp_prepare_cpu,
420 .kick_cpu = pnv_smp_kick_cpu,
421 .setup_cpu = pnv_smp_setup_cpu,
422 .cpu_bootable = pnv_cpu_bootable,
423 #ifdef CONFIG_HOTPLUG_CPU
424 .cpu_disable = pnv_smp_cpu_disable,
425 .cpu_die = generic_cpu_die,
426 .cpu_offline_self = pnv_cpu_offline_self,
427 #endif /* CONFIG_HOTPLUG_CPU */
428 };
429
430 /* This is called very early during platform setup_arch */
pnv_smp_init(void)431 void __init pnv_smp_init(void)
432 {
433 if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
434 ppc_md.system_reset_exception = pnv_system_reset_exception;
435 pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
436 }
437 smp_ops = &pnv_smp_ops;
438
439 #ifdef CONFIG_HOTPLUG_CPU
440 #ifdef CONFIG_CRASH_DUMP
441 crash_wake_offline = 1;
442 #endif
443 #endif
444 }
445