1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2019 Western Digital Corporation or its affiliates.
4 *
5 * Authors:
6 * Anup Patel <[email protected]>
7 */
8
9 #include <linux/bitops.h>
10 #include <linux/entry-kvm.h>
11 #include <linux/errno.h>
12 #include <linux/err.h>
13 #include <linux/kdebug.h>
14 #include <linux/module.h>
15 #include <linux/percpu.h>
16 #include <linux/vmalloc.h>
17 #include <linux/sched/signal.h>
18 #include <linux/fs.h>
19 #include <linux/kvm_host.h>
20 #include <asm/cacheflush.h>
21 #include <asm/kvm_nacl.h>
22 #include <asm/kvm_vcpu_vector.h>
23
24 #define CREATE_TRACE_POINTS
25 #include "trace.h"
26
27 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
28 KVM_GENERIC_VCPU_STATS(),
29 STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
30 STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
31 STATS_DESC_COUNTER(VCPU, wrs_exit_stat),
32 STATS_DESC_COUNTER(VCPU, mmio_exit_user),
33 STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
34 STATS_DESC_COUNTER(VCPU, csr_exit_user),
35 STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
36 STATS_DESC_COUNTER(VCPU, signal_exits),
37 STATS_DESC_COUNTER(VCPU, exits),
38 STATS_DESC_COUNTER(VCPU, instr_illegal_exits),
39 STATS_DESC_COUNTER(VCPU, load_misaligned_exits),
40 STATS_DESC_COUNTER(VCPU, store_misaligned_exits),
41 STATS_DESC_COUNTER(VCPU, load_access_exits),
42 STATS_DESC_COUNTER(VCPU, store_access_exits),
43 };
44
45 const struct kvm_stats_header kvm_vcpu_stats_header = {
46 .name_size = KVM_STATS_NAME_SIZE,
47 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
48 .id_offset = sizeof(struct kvm_stats_header),
49 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
50 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
51 sizeof(kvm_vcpu_stats_desc),
52 };
53
kvm_riscv_reset_vcpu(struct kvm_vcpu * vcpu)54 static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
55 {
56 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
57 struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
58 struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
59 struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
60 bool loaded;
61
62 /**
63 * The preemption should be disabled here because it races with
64 * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
65 * also calls vcpu_load/put.
66 */
67 get_cpu();
68 loaded = (vcpu->cpu != -1);
69 if (loaded)
70 kvm_arch_vcpu_put(vcpu);
71
72 vcpu->arch.last_exit_cpu = -1;
73
74 memcpy(csr, reset_csr, sizeof(*csr));
75
76 spin_lock(&vcpu->arch.reset_cntx_lock);
77 memcpy(cntx, reset_cntx, sizeof(*cntx));
78 spin_unlock(&vcpu->arch.reset_cntx_lock);
79
80 kvm_riscv_vcpu_fp_reset(vcpu);
81
82 kvm_riscv_vcpu_vector_reset(vcpu);
83
84 kvm_riscv_vcpu_timer_reset(vcpu);
85
86 kvm_riscv_vcpu_aia_reset(vcpu);
87
88 bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS);
89 bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS);
90
91 kvm_riscv_vcpu_pmu_reset(vcpu);
92
93 vcpu->arch.hfence_head = 0;
94 vcpu->arch.hfence_tail = 0;
95 memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
96
97 kvm_riscv_vcpu_sbi_sta_reset(vcpu);
98
99 /* Reset the guest CSRs for hotplug usecase */
100 if (loaded)
101 kvm_arch_vcpu_load(vcpu, smp_processor_id());
102 put_cpu();
103 }
104
kvm_arch_vcpu_precreate(struct kvm * kvm,unsigned int id)105 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
106 {
107 return 0;
108 }
109
kvm_arch_vcpu_create(struct kvm_vcpu * vcpu)110 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
111 {
112 int rc;
113 struct kvm_cpu_context *cntx;
114 struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
115
116 spin_lock_init(&vcpu->arch.mp_state_lock);
117
118 /* Mark this VCPU never ran */
119 vcpu->arch.ran_atleast_once = false;
120 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
121 bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
122
123 /* Setup ISA features available to VCPU */
124 kvm_riscv_vcpu_setup_isa(vcpu);
125
126 /* Setup vendor, arch, and implementation details */
127 vcpu->arch.mvendorid = sbi_get_mvendorid();
128 vcpu->arch.marchid = sbi_get_marchid();
129 vcpu->arch.mimpid = sbi_get_mimpid();
130
131 /* Setup VCPU hfence queue */
132 spin_lock_init(&vcpu->arch.hfence_lock);
133
134 /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
135 spin_lock_init(&vcpu->arch.reset_cntx_lock);
136
137 spin_lock(&vcpu->arch.reset_cntx_lock);
138 cntx = &vcpu->arch.guest_reset_context;
139 cntx->sstatus = SR_SPP | SR_SPIE;
140 cntx->hstatus = 0;
141 cntx->hstatus |= HSTATUS_VTW;
142 cntx->hstatus |= HSTATUS_SPVP;
143 cntx->hstatus |= HSTATUS_SPV;
144 spin_unlock(&vcpu->arch.reset_cntx_lock);
145
146 if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
147 return -ENOMEM;
148
149 /* By default, make CY, TM, and IR counters accessible in VU mode */
150 reset_csr->scounteren = 0x7;
151
152 /* Setup VCPU timer */
153 kvm_riscv_vcpu_timer_init(vcpu);
154
155 /* setup performance monitoring */
156 kvm_riscv_vcpu_pmu_init(vcpu);
157
158 /* Setup VCPU AIA */
159 rc = kvm_riscv_vcpu_aia_init(vcpu);
160 if (rc)
161 return rc;
162
163 /*
164 * Setup SBI extensions
165 * NOTE: This must be the last thing to be initialized.
166 */
167 kvm_riscv_vcpu_sbi_init(vcpu);
168
169 /* Reset VCPU */
170 kvm_riscv_reset_vcpu(vcpu);
171
172 return 0;
173 }
174
kvm_arch_vcpu_postcreate(struct kvm_vcpu * vcpu)175 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
176 {
177 /**
178 * vcpu with id 0 is the designated boot cpu.
179 * Keep all vcpus with non-zero id in power-off state so that
180 * they can be brought up using SBI HSM extension.
181 */
182 if (vcpu->vcpu_idx != 0)
183 kvm_riscv_vcpu_power_off(vcpu);
184 }
185
kvm_arch_vcpu_destroy(struct kvm_vcpu * vcpu)186 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
187 {
188 /* Cleanup VCPU AIA context */
189 kvm_riscv_vcpu_aia_deinit(vcpu);
190
191 /* Cleanup VCPU timer */
192 kvm_riscv_vcpu_timer_deinit(vcpu);
193
194 kvm_riscv_vcpu_pmu_deinit(vcpu);
195
196 /* Free unused pages pre-allocated for G-stage page table mappings */
197 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
198
199 /* Free vector context space for host and guest kernel */
200 kvm_riscv_vcpu_free_vector_context(vcpu);
201 }
202
kvm_cpu_has_pending_timer(struct kvm_vcpu * vcpu)203 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
204 {
205 return kvm_riscv_vcpu_timer_pending(vcpu);
206 }
207
kvm_arch_vcpu_blocking(struct kvm_vcpu * vcpu)208 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
209 {
210 kvm_riscv_aia_wakeon_hgei(vcpu, true);
211 }
212
kvm_arch_vcpu_unblocking(struct kvm_vcpu * vcpu)213 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
214 {
215 kvm_riscv_aia_wakeon_hgei(vcpu, false);
216 }
217
kvm_arch_vcpu_runnable(struct kvm_vcpu * vcpu)218 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
219 {
220 return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
221 !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
222 }
223
kvm_arch_vcpu_should_kick(struct kvm_vcpu * vcpu)224 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
225 {
226 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
227 }
228
kvm_arch_vcpu_in_kernel(struct kvm_vcpu * vcpu)229 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
230 {
231 return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
232 }
233
234 #ifdef CONFIG_GUEST_PERF_EVENTS
kvm_arch_vcpu_get_ip(struct kvm_vcpu * vcpu)235 unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
236 {
237 return vcpu->arch.guest_context.sepc;
238 }
239 #endif
240
kvm_arch_vcpu_fault(struct kvm_vcpu * vcpu,struct vm_fault * vmf)241 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
242 {
243 return VM_FAULT_SIGBUS;
244 }
245
kvm_arch_vcpu_async_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)246 long kvm_arch_vcpu_async_ioctl(struct file *filp,
247 unsigned int ioctl, unsigned long arg)
248 {
249 struct kvm_vcpu *vcpu = filp->private_data;
250 void __user *argp = (void __user *)arg;
251
252 if (ioctl == KVM_INTERRUPT) {
253 struct kvm_interrupt irq;
254
255 if (copy_from_user(&irq, argp, sizeof(irq)))
256 return -EFAULT;
257
258 if (irq.irq == KVM_INTERRUPT_SET)
259 return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_VS_EXT);
260 else
261 return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
262 }
263
264 return -ENOIOCTLCMD;
265 }
266
kvm_arch_vcpu_ioctl(struct file * filp,unsigned int ioctl,unsigned long arg)267 long kvm_arch_vcpu_ioctl(struct file *filp,
268 unsigned int ioctl, unsigned long arg)
269 {
270 struct kvm_vcpu *vcpu = filp->private_data;
271 void __user *argp = (void __user *)arg;
272 long r = -EINVAL;
273
274 switch (ioctl) {
275 case KVM_SET_ONE_REG:
276 case KVM_GET_ONE_REG: {
277 struct kvm_one_reg reg;
278
279 r = -EFAULT;
280 if (copy_from_user(®, argp, sizeof(reg)))
281 break;
282
283 if (ioctl == KVM_SET_ONE_REG)
284 r = kvm_riscv_vcpu_set_reg(vcpu, ®);
285 else
286 r = kvm_riscv_vcpu_get_reg(vcpu, ®);
287 break;
288 }
289 case KVM_GET_REG_LIST: {
290 struct kvm_reg_list __user *user_list = argp;
291 struct kvm_reg_list reg_list;
292 unsigned int n;
293
294 r = -EFAULT;
295 if (copy_from_user(®_list, user_list, sizeof(reg_list)))
296 break;
297 n = reg_list.n;
298 reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
299 if (copy_to_user(user_list, ®_list, sizeof(reg_list)))
300 break;
301 r = -E2BIG;
302 if (n < reg_list.n)
303 break;
304 r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
305 break;
306 }
307 default:
308 break;
309 }
310
311 return r;
312 }
313
kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)314 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
315 struct kvm_sregs *sregs)
316 {
317 return -EINVAL;
318 }
319
kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu * vcpu,struct kvm_sregs * sregs)320 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
321 struct kvm_sregs *sregs)
322 {
323 return -EINVAL;
324 }
325
kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)326 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
327 {
328 return -EINVAL;
329 }
330
kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu * vcpu,struct kvm_fpu * fpu)331 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
332 {
333 return -EINVAL;
334 }
335
kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu * vcpu,struct kvm_translation * tr)336 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
337 struct kvm_translation *tr)
338 {
339 return -EINVAL;
340 }
341
kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)342 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
343 {
344 return -EINVAL;
345 }
346
kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu * vcpu,struct kvm_regs * regs)347 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
348 {
349 return -EINVAL;
350 }
351
kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu * vcpu)352 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu)
353 {
354 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
355 unsigned long mask, val;
356
357 if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) {
358 mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0);
359 val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask;
360
361 csr->hvip &= ~mask;
362 csr->hvip |= val;
363 }
364
365 /* Flush AIA high interrupts */
366 kvm_riscv_vcpu_aia_flush_interrupts(vcpu);
367 }
368
kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu * vcpu)369 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
370 {
371 unsigned long hvip;
372 struct kvm_vcpu_arch *v = &vcpu->arch;
373 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
374
375 /* Read current HVIP and VSIE CSRs */
376 csr->vsie = ncsr_read(CSR_VSIE);
377
378 /* Sync-up HVIP.VSSIP bit changes does by Guest */
379 hvip = ncsr_read(CSR_HVIP);
380 if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
381 if (hvip & (1UL << IRQ_VS_SOFT)) {
382 if (!test_and_set_bit(IRQ_VS_SOFT,
383 v->irqs_pending_mask))
384 set_bit(IRQ_VS_SOFT, v->irqs_pending);
385 } else {
386 if (!test_and_set_bit(IRQ_VS_SOFT,
387 v->irqs_pending_mask))
388 clear_bit(IRQ_VS_SOFT, v->irqs_pending);
389 }
390 }
391
392 /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
393 if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
394 if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
395 !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
396 clear_bit(IRQ_PMU_OVF, v->irqs_pending);
397 }
398
399 /* Sync-up AIA high interrupts */
400 kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
401
402 /* Sync-up timer CSRs */
403 kvm_riscv_vcpu_timer_sync(vcpu);
404 }
405
kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu * vcpu,unsigned int irq)406 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
407 {
408 /*
409 * We only allow VS-mode software, timer, and external
410 * interrupts when irq is one of the local interrupts
411 * defined by RISC-V privilege specification.
412 */
413 if (irq < IRQ_LOCAL_MAX &&
414 irq != IRQ_VS_SOFT &&
415 irq != IRQ_VS_TIMER &&
416 irq != IRQ_VS_EXT &&
417 irq != IRQ_PMU_OVF)
418 return -EINVAL;
419
420 set_bit(irq, vcpu->arch.irqs_pending);
421 smp_mb__before_atomic();
422 set_bit(irq, vcpu->arch.irqs_pending_mask);
423
424 kvm_vcpu_kick(vcpu);
425
426 return 0;
427 }
428
kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu * vcpu,unsigned int irq)429 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
430 {
431 /*
432 * We only allow VS-mode software, timer, counter overflow and external
433 * interrupts when irq is one of the local interrupts
434 * defined by RISC-V privilege specification.
435 */
436 if (irq < IRQ_LOCAL_MAX &&
437 irq != IRQ_VS_SOFT &&
438 irq != IRQ_VS_TIMER &&
439 irq != IRQ_VS_EXT &&
440 irq != IRQ_PMU_OVF)
441 return -EINVAL;
442
443 clear_bit(irq, vcpu->arch.irqs_pending);
444 smp_mb__before_atomic();
445 set_bit(irq, vcpu->arch.irqs_pending_mask);
446
447 return 0;
448 }
449
kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu * vcpu,u64 mask)450 bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
451 {
452 unsigned long ie;
453
454 ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK)
455 << VSIP_TO_HVIP_SHIFT) & (unsigned long)mask;
456 ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK &
457 (unsigned long)mask;
458 if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie)
459 return true;
460
461 /* Check AIA high interrupts */
462 return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
463 }
464
__kvm_riscv_vcpu_power_off(struct kvm_vcpu * vcpu)465 void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
466 {
467 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
468 kvm_make_request(KVM_REQ_SLEEP, vcpu);
469 kvm_vcpu_kick(vcpu);
470 }
471
kvm_riscv_vcpu_power_off(struct kvm_vcpu * vcpu)472 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
473 {
474 spin_lock(&vcpu->arch.mp_state_lock);
475 __kvm_riscv_vcpu_power_off(vcpu);
476 spin_unlock(&vcpu->arch.mp_state_lock);
477 }
478
__kvm_riscv_vcpu_power_on(struct kvm_vcpu * vcpu)479 void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
480 {
481 WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
482 kvm_vcpu_wake_up(vcpu);
483 }
484
kvm_riscv_vcpu_power_on(struct kvm_vcpu * vcpu)485 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
486 {
487 spin_lock(&vcpu->arch.mp_state_lock);
488 __kvm_riscv_vcpu_power_on(vcpu);
489 spin_unlock(&vcpu->arch.mp_state_lock);
490 }
491
kvm_riscv_vcpu_stopped(struct kvm_vcpu * vcpu)492 bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
493 {
494 return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
495 }
496
kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)497 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
498 struct kvm_mp_state *mp_state)
499 {
500 *mp_state = READ_ONCE(vcpu->arch.mp_state);
501
502 return 0;
503 }
504
kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu * vcpu,struct kvm_mp_state * mp_state)505 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
506 struct kvm_mp_state *mp_state)
507 {
508 int ret = 0;
509
510 spin_lock(&vcpu->arch.mp_state_lock);
511
512 switch (mp_state->mp_state) {
513 case KVM_MP_STATE_RUNNABLE:
514 WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
515 break;
516 case KVM_MP_STATE_STOPPED:
517 __kvm_riscv_vcpu_power_off(vcpu);
518 break;
519 default:
520 ret = -EINVAL;
521 }
522
523 spin_unlock(&vcpu->arch.mp_state_lock);
524
525 return ret;
526 }
527
kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu * vcpu,struct kvm_guest_debug * dbg)528 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
529 struct kvm_guest_debug *dbg)
530 {
531 if (dbg->control & KVM_GUESTDBG_ENABLE) {
532 vcpu->guest_debug = dbg->control;
533 vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
534 } else {
535 vcpu->guest_debug = 0;
536 vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
537 }
538
539 return 0;
540 }
541
kvm_riscv_vcpu_setup_config(struct kvm_vcpu * vcpu)542 static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
543 {
544 const unsigned long *isa = vcpu->arch.isa;
545 struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
546
547 if (riscv_isa_extension_available(isa, SVPBMT))
548 cfg->henvcfg |= ENVCFG_PBMTE;
549
550 if (riscv_isa_extension_available(isa, SSTC))
551 cfg->henvcfg |= ENVCFG_STCE;
552
553 if (riscv_isa_extension_available(isa, ZICBOM))
554 cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
555
556 if (riscv_isa_extension_available(isa, ZICBOZ))
557 cfg->henvcfg |= ENVCFG_CBZE;
558
559 if (riscv_isa_extension_available(isa, SVADU) &&
560 !riscv_isa_extension_available(isa, SVADE))
561 cfg->henvcfg |= ENVCFG_ADUE;
562
563 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
564 cfg->hstateen0 |= SMSTATEEN0_HSENVCFG;
565 if (riscv_isa_extension_available(isa, SSAIA))
566 cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC |
567 SMSTATEEN0_AIA |
568 SMSTATEEN0_AIA_ISEL;
569 if (riscv_isa_extension_available(isa, SMSTATEEN))
570 cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
571 }
572
573 cfg->hedeleg = KVM_HEDELEG_DEFAULT;
574 if (vcpu->guest_debug)
575 cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
576 }
577
kvm_arch_vcpu_load(struct kvm_vcpu * vcpu,int cpu)578 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
579 {
580 void *nsh;
581 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
582 struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
583
584 if (kvm_riscv_nacl_sync_csr_available()) {
585 nsh = nacl_shmem();
586 nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus);
587 nacl_csr_write(nsh, CSR_VSIE, csr->vsie);
588 nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec);
589 nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch);
590 nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc);
591 nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause);
592 nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval);
593 nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg);
594 nacl_csr_write(nsh, CSR_HVIP, csr->hvip);
595 nacl_csr_write(nsh, CSR_VSATP, csr->vsatp);
596 nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg);
597 if (IS_ENABLED(CONFIG_32BIT))
598 nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32);
599 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
600 nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0);
601 if (IS_ENABLED(CONFIG_32BIT))
602 nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
603 }
604 } else {
605 csr_write(CSR_VSSTATUS, csr->vsstatus);
606 csr_write(CSR_VSIE, csr->vsie);
607 csr_write(CSR_VSTVEC, csr->vstvec);
608 csr_write(CSR_VSSCRATCH, csr->vsscratch);
609 csr_write(CSR_VSEPC, csr->vsepc);
610 csr_write(CSR_VSCAUSE, csr->vscause);
611 csr_write(CSR_VSTVAL, csr->vstval);
612 csr_write(CSR_HEDELEG, cfg->hedeleg);
613 csr_write(CSR_HVIP, csr->hvip);
614 csr_write(CSR_VSATP, csr->vsatp);
615 csr_write(CSR_HENVCFG, cfg->henvcfg);
616 if (IS_ENABLED(CONFIG_32BIT))
617 csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32);
618 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
619 csr_write(CSR_HSTATEEN0, cfg->hstateen0);
620 if (IS_ENABLED(CONFIG_32BIT))
621 csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
622 }
623 }
624
625 kvm_riscv_gstage_update_hgatp(vcpu);
626
627 kvm_riscv_vcpu_timer_restore(vcpu);
628
629 kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
630 kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context,
631 vcpu->arch.isa);
632 kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context);
633 kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context,
634 vcpu->arch.isa);
635
636 kvm_riscv_vcpu_aia_load(vcpu, cpu);
637
638 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
639
640 vcpu->cpu = cpu;
641 }
642
kvm_arch_vcpu_put(struct kvm_vcpu * vcpu)643 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
644 {
645 void *nsh;
646 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
647
648 vcpu->cpu = -1;
649
650 kvm_riscv_vcpu_aia_put(vcpu);
651
652 kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context,
653 vcpu->arch.isa);
654 kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
655
656 kvm_riscv_vcpu_timer_save(vcpu);
657 kvm_riscv_vcpu_guest_vector_save(&vcpu->arch.guest_context,
658 vcpu->arch.isa);
659 kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context);
660
661 if (kvm_riscv_nacl_available()) {
662 nsh = nacl_shmem();
663 csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS);
664 csr->vsie = nacl_csr_read(nsh, CSR_VSIE);
665 csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC);
666 csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH);
667 csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC);
668 csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE);
669 csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL);
670 csr->hvip = nacl_csr_read(nsh, CSR_HVIP);
671 csr->vsatp = nacl_csr_read(nsh, CSR_VSATP);
672 } else {
673 csr->vsstatus = csr_read(CSR_VSSTATUS);
674 csr->vsie = csr_read(CSR_VSIE);
675 csr->vstvec = csr_read(CSR_VSTVEC);
676 csr->vsscratch = csr_read(CSR_VSSCRATCH);
677 csr->vsepc = csr_read(CSR_VSEPC);
678 csr->vscause = csr_read(CSR_VSCAUSE);
679 csr->vstval = csr_read(CSR_VSTVAL);
680 csr->hvip = csr_read(CSR_HVIP);
681 csr->vsatp = csr_read(CSR_VSATP);
682 }
683 }
684
kvm_riscv_check_vcpu_requests(struct kvm_vcpu * vcpu)685 static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
686 {
687 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
688
689 if (kvm_request_pending(vcpu)) {
690 if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
691 kvm_vcpu_srcu_read_unlock(vcpu);
692 rcuwait_wait_event(wait,
693 (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
694 TASK_INTERRUPTIBLE);
695 kvm_vcpu_srcu_read_lock(vcpu);
696
697 if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
698 /*
699 * Awaken to handle a signal, request to
700 * sleep again later.
701 */
702 kvm_make_request(KVM_REQ_SLEEP, vcpu);
703 }
704 }
705
706 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
707 kvm_riscv_reset_vcpu(vcpu);
708
709 if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
710 kvm_riscv_gstage_update_hgatp(vcpu);
711
712 if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
713 kvm_riscv_fence_i_process(vcpu);
714
715 /*
716 * The generic KVM_REQ_TLB_FLUSH is same as
717 * KVM_REQ_HFENCE_GVMA_VMID_ALL
718 */
719 if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
720 kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
721
722 if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
723 kvm_riscv_hfence_vvma_all_process(vcpu);
724
725 if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
726 kvm_riscv_hfence_process(vcpu);
727
728 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
729 kvm_riscv_vcpu_record_steal_time(vcpu);
730 }
731 }
732
kvm_riscv_update_hvip(struct kvm_vcpu * vcpu)733 static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
734 {
735 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
736
737 ncsr_write(CSR_HVIP, csr->hvip);
738 kvm_riscv_vcpu_aia_update_hvip(vcpu);
739 }
740
kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu * vcpu)741 static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu *vcpu)
742 {
743 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
744 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
745 struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
746
747 vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren);
748 vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg);
749 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
750 (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
751 vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0,
752 smcsr->sstateen0);
753 }
754
kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu * vcpu)755 static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu)
756 {
757 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
758 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
759 struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
760
761 csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren);
762 csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg);
763 if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
764 (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
765 smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0,
766 vcpu->arch.host_sstateen0);
767 }
768
769 /*
770 * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
771 * the vCPU is running.
772 *
773 * This must be noinstr as instrumentation may make use of RCU, and this is not
774 * safe during the EQS.
775 */
kvm_riscv_vcpu_enter_exit(struct kvm_vcpu * vcpu,struct kvm_cpu_trap * trap)776 static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu,
777 struct kvm_cpu_trap *trap)
778 {
779 void *nsh;
780 struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context;
781 struct kvm_cpu_context *hcntx = &vcpu->arch.host_context;
782
783 /*
784 * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and
785 * HTINST) here because we do local_irq_enable() after this
786 * function in kvm_arch_vcpu_ioctl_run() which can result in
787 * an interrupt immediately after local_irq_enable() and can
788 * potentially change trap CSRs.
789 */
790
791 kvm_riscv_vcpu_swap_in_guest_state(vcpu);
792 guest_state_enter_irqoff();
793
794 if (kvm_riscv_nacl_sync_sret_available()) {
795 nsh = nacl_shmem();
796
797 if (kvm_riscv_nacl_autoswap_csr_available()) {
798 hcntx->hstatus =
799 nacl_csr_read(nsh, CSR_HSTATUS);
800 nacl_scratch_write_long(nsh,
801 SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
802 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS,
803 gcntx->hstatus);
804 nacl_scratch_write_long(nsh,
805 SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
806 SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS);
807 } else if (kvm_riscv_nacl_sync_csr_available()) {
808 hcntx->hstatus = nacl_csr_swap(nsh,
809 CSR_HSTATUS, gcntx->hstatus);
810 } else {
811 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
812 }
813
814 nacl_scratch_write_longs(nsh,
815 SBI_NACL_SHMEM_SRET_OFFSET +
816 SBI_NACL_SHMEM_SRET_X(1),
817 &gcntx->ra,
818 SBI_NACL_SHMEM_SRET_X_LAST);
819
820 __kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL,
821 SBI_EXT_NACL_SYNC_SRET);
822
823 if (kvm_riscv_nacl_autoswap_csr_available()) {
824 nacl_scratch_write_long(nsh,
825 SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
826 0);
827 gcntx->hstatus = nacl_scratch_read_long(nsh,
828 SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
829 SBI_NACL_SHMEM_AUTOSWAP_HSTATUS);
830 } else {
831 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
832 }
833
834 trap->htval = nacl_csr_read(nsh, CSR_HTVAL);
835 trap->htinst = nacl_csr_read(nsh, CSR_HTINST);
836 } else {
837 hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
838
839 __kvm_riscv_switch_to(&vcpu->arch);
840
841 gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
842
843 trap->htval = csr_read(CSR_HTVAL);
844 trap->htinst = csr_read(CSR_HTINST);
845 }
846
847 trap->sepc = gcntx->sepc;
848 trap->scause = csr_read(CSR_SCAUSE);
849 trap->stval = csr_read(CSR_STVAL);
850
851 vcpu->arch.last_exit_cpu = vcpu->cpu;
852 guest_state_exit_irqoff();
853 kvm_riscv_vcpu_swap_in_host_state(vcpu);
854 }
855
kvm_arch_vcpu_ioctl_run(struct kvm_vcpu * vcpu)856 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
857 {
858 int ret;
859 struct kvm_cpu_trap trap;
860 struct kvm_run *run = vcpu->run;
861
862 if (!vcpu->arch.ran_atleast_once)
863 kvm_riscv_vcpu_setup_config(vcpu);
864
865 /* Mark this VCPU ran at least once */
866 vcpu->arch.ran_atleast_once = true;
867
868 kvm_vcpu_srcu_read_lock(vcpu);
869
870 switch (run->exit_reason) {
871 case KVM_EXIT_MMIO:
872 /* Process MMIO value returned from user-space */
873 ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
874 break;
875 case KVM_EXIT_RISCV_SBI:
876 /* Process SBI value returned from user-space */
877 ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
878 break;
879 case KVM_EXIT_RISCV_CSR:
880 /* Process CSR value returned from user-space */
881 ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
882 break;
883 default:
884 ret = 0;
885 break;
886 }
887 if (ret) {
888 kvm_vcpu_srcu_read_unlock(vcpu);
889 return ret;
890 }
891
892 if (!vcpu->wants_to_run) {
893 kvm_vcpu_srcu_read_unlock(vcpu);
894 return -EINTR;
895 }
896
897 vcpu_load(vcpu);
898
899 kvm_sigset_activate(vcpu);
900
901 ret = 1;
902 run->exit_reason = KVM_EXIT_UNKNOWN;
903 while (ret > 0) {
904 /* Check conditions before entering the guest */
905 ret = xfer_to_guest_mode_handle_work(vcpu);
906 if (ret)
907 continue;
908 ret = 1;
909
910 kvm_riscv_gstage_vmid_update(vcpu);
911
912 kvm_riscv_check_vcpu_requests(vcpu);
913
914 preempt_disable();
915
916 /* Update AIA HW state before entering guest */
917 ret = kvm_riscv_vcpu_aia_update(vcpu);
918 if (ret <= 0) {
919 preempt_enable();
920 continue;
921 }
922
923 local_irq_disable();
924
925 /*
926 * Ensure we set mode to IN_GUEST_MODE after we disable
927 * interrupts and before the final VCPU requests check.
928 * See the comment in kvm_vcpu_exiting_guest_mode() and
929 * Documentation/virt/kvm/vcpu-requests.rst
930 */
931 vcpu->mode = IN_GUEST_MODE;
932
933 kvm_vcpu_srcu_read_unlock(vcpu);
934 smp_mb__after_srcu_read_unlock();
935
936 /*
937 * We might have got VCPU interrupts updated asynchronously
938 * so update it in HW.
939 */
940 kvm_riscv_vcpu_flush_interrupts(vcpu);
941
942 /* Update HVIP CSR for current CPU */
943 kvm_riscv_update_hvip(vcpu);
944
945 if (kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
946 kvm_request_pending(vcpu) ||
947 xfer_to_guest_mode_work_pending()) {
948 vcpu->mode = OUTSIDE_GUEST_MODE;
949 local_irq_enable();
950 preempt_enable();
951 kvm_vcpu_srcu_read_lock(vcpu);
952 continue;
953 }
954
955 /*
956 * Cleanup stale TLB enteries
957 *
958 * Note: This should be done after G-stage VMID has been
959 * updated using kvm_riscv_gstage_vmid_ver_changed()
960 */
961 kvm_riscv_local_tlb_sanitize(vcpu);
962
963 trace_kvm_entry(vcpu);
964
965 guest_timing_enter_irqoff();
966
967 kvm_riscv_vcpu_enter_exit(vcpu, &trap);
968
969 vcpu->mode = OUTSIDE_GUEST_MODE;
970 vcpu->stat.exits++;
971
972 /* Syncup interrupts state with HW */
973 kvm_riscv_vcpu_sync_interrupts(vcpu);
974
975 /*
976 * We must ensure that any pending interrupts are taken before
977 * we exit guest timing so that timer ticks are accounted as
978 * guest time. Transiently unmask interrupts so that any
979 * pending interrupts are taken.
980 *
981 * There's no barrier which ensures that pending interrupts are
982 * recognised, so we just hope that the CPU takes any pending
983 * interrupts between the enable and disable.
984 */
985 local_irq_enable();
986 local_irq_disable();
987
988 guest_timing_exit_irqoff();
989
990 local_irq_enable();
991
992 trace_kvm_exit(&trap);
993
994 preempt_enable();
995
996 kvm_vcpu_srcu_read_lock(vcpu);
997
998 ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
999 }
1000
1001 kvm_sigset_deactivate(vcpu);
1002
1003 vcpu_put(vcpu);
1004
1005 kvm_vcpu_srcu_read_unlock(vcpu);
1006
1007 return ret;
1008 }
1009