1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2020 - Google LLC
4 * Author: Quentin Perret <[email protected]>
5 */
6
7 #include <linux/init.h>
8 #include <linux/kmemleak.h>
9 #include <linux/kvm_host.h>
10 #include <asm/kvm_mmu.h>
11 #include <linux/memblock.h>
12 #include <linux/mutex.h>
13 #include <linux/sort.h>
14
15 #include <asm/kvm_pkvm.h>
16
17 #include "hyp_constants.h"
18
19 DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
20
21 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
22 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
23
24 phys_addr_t hyp_mem_base;
25 phys_addr_t hyp_mem_size;
26
cmp_hyp_memblock(const void * p1,const void * p2)27 static int cmp_hyp_memblock(const void *p1, const void *p2)
28 {
29 const struct memblock_region *r1 = p1;
30 const struct memblock_region *r2 = p2;
31
32 return r1->base < r2->base ? -1 : (r1->base > r2->base);
33 }
34
sort_memblock_regions(void)35 static void __init sort_memblock_regions(void)
36 {
37 sort(hyp_memory,
38 *hyp_memblock_nr_ptr,
39 sizeof(struct memblock_region),
40 cmp_hyp_memblock,
41 NULL);
42 }
43
register_memblock_regions(void)44 static int __init register_memblock_regions(void)
45 {
46 struct memblock_region *reg;
47
48 for_each_mem_region(reg) {
49 if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
50 return -ENOMEM;
51
52 hyp_memory[*hyp_memblock_nr_ptr] = *reg;
53 (*hyp_memblock_nr_ptr)++;
54 }
55 sort_memblock_regions();
56
57 return 0;
58 }
59
kvm_hyp_reserve(void)60 void __init kvm_hyp_reserve(void)
61 {
62 u64 hyp_mem_pages = 0;
63 int ret;
64
65 if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
66 return;
67
68 if (kvm_get_mode() != KVM_MODE_PROTECTED)
69 return;
70
71 ret = register_memblock_regions();
72 if (ret) {
73 *hyp_memblock_nr_ptr = 0;
74 kvm_err("Failed to register hyp memblocks: %d\n", ret);
75 return;
76 }
77
78 hyp_mem_pages += hyp_s1_pgtable_pages();
79 hyp_mem_pages += host_s2_pgtable_pages();
80 hyp_mem_pages += hyp_vm_table_pages();
81 hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
82 hyp_mem_pages += hyp_ffa_proxy_pages();
83
84 /*
85 * Try to allocate a PMD-aligned region to reduce TLB pressure once
86 * this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
87 */
88 hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
89 hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE),
90 PMD_SIZE);
91 if (!hyp_mem_base)
92 hyp_mem_base = memblock_phys_alloc(hyp_mem_size, PAGE_SIZE);
93 else
94 hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
95
96 if (!hyp_mem_base) {
97 kvm_err("Failed to reserve hyp memory\n");
98 return;
99 }
100
101 kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
102 hyp_mem_base);
103 }
104
__pkvm_destroy_hyp_vm(struct kvm * host_kvm)105 static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
106 {
107 if (host_kvm->arch.pkvm.handle) {
108 WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
109 host_kvm->arch.pkvm.handle));
110 }
111
112 host_kvm->arch.pkvm.handle = 0;
113 free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
114 }
115
116 /*
117 * Allocates and donates memory for hypervisor VM structs at EL2.
118 *
119 * Allocates space for the VM state, which includes the hyp vm as well as
120 * the hyp vcpus.
121 *
122 * Stores an opaque handler in the kvm struct for future reference.
123 *
124 * Return 0 on success, negative error code on failure.
125 */
__pkvm_create_hyp_vm(struct kvm * host_kvm)126 static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
127 {
128 size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
129 struct kvm_vcpu *host_vcpu;
130 pkvm_handle_t handle;
131 void *pgd, *hyp_vm;
132 unsigned long idx;
133 int ret;
134
135 if (host_kvm->created_vcpus < 1)
136 return -EINVAL;
137
138 pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
139
140 /*
141 * The PGD pages will be reclaimed using a hyp_memcache which implies
142 * page granularity. So, use alloc_pages_exact() to get individual
143 * refcounts.
144 */
145 pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
146 if (!pgd)
147 return -ENOMEM;
148
149 /* Allocate memory to donate to hyp for vm and vcpu pointers. */
150 hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
151 size_mul(sizeof(void *),
152 host_kvm->created_vcpus)));
153 hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
154 if (!hyp_vm) {
155 ret = -ENOMEM;
156 goto free_pgd;
157 }
158
159 /* Donate the VM memory to hyp and let hyp initialize it. */
160 ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
161 if (ret < 0)
162 goto free_vm;
163
164 handle = ret;
165
166 host_kvm->arch.pkvm.handle = handle;
167
168 /* Donate memory for the vcpus at hyp and initialize it. */
169 hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
170 kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
171 void *hyp_vcpu;
172
173 /* Indexing of the vcpus to be sequential starting at 0. */
174 if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
175 ret = -EINVAL;
176 goto destroy_vm;
177 }
178
179 hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
180 if (!hyp_vcpu) {
181 ret = -ENOMEM;
182 goto destroy_vm;
183 }
184
185 ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
186 hyp_vcpu);
187 if (ret) {
188 free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
189 goto destroy_vm;
190 }
191 }
192
193 return 0;
194
195 destroy_vm:
196 __pkvm_destroy_hyp_vm(host_kvm);
197 return ret;
198 free_vm:
199 free_pages_exact(hyp_vm, hyp_vm_sz);
200 free_pgd:
201 free_pages_exact(pgd, pgd_sz);
202 return ret;
203 }
204
pkvm_create_hyp_vm(struct kvm * host_kvm)205 int pkvm_create_hyp_vm(struct kvm *host_kvm)
206 {
207 int ret = 0;
208
209 mutex_lock(&host_kvm->arch.config_lock);
210 if (!host_kvm->arch.pkvm.handle)
211 ret = __pkvm_create_hyp_vm(host_kvm);
212 mutex_unlock(&host_kvm->arch.config_lock);
213
214 return ret;
215 }
216
pkvm_destroy_hyp_vm(struct kvm * host_kvm)217 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
218 {
219 mutex_lock(&host_kvm->arch.config_lock);
220 __pkvm_destroy_hyp_vm(host_kvm);
221 mutex_unlock(&host_kvm->arch.config_lock);
222 }
223
pkvm_init_host_vm(struct kvm * host_kvm)224 int pkvm_init_host_vm(struct kvm *host_kvm)
225 {
226 return 0;
227 }
228
_kvm_host_prot_finalize(void * arg)229 static void __init _kvm_host_prot_finalize(void *arg)
230 {
231 int *err = arg;
232
233 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
234 WRITE_ONCE(*err, -EINVAL);
235 }
236
pkvm_drop_host_privileges(void)237 static int __init pkvm_drop_host_privileges(void)
238 {
239 int ret = 0;
240
241 /*
242 * Flip the static key upfront as that may no longer be possible
243 * once the host stage 2 is installed.
244 */
245 static_branch_enable(&kvm_protected_mode_initialized);
246 on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
247 return ret;
248 }
249
finalize_pkvm(void)250 static int __init finalize_pkvm(void)
251 {
252 int ret;
253
254 if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
255 return 0;
256
257 /*
258 * Exclude HYP sections from kmemleak so that they don't get peeked
259 * at, which would end badly once inaccessible.
260 */
261 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
262 kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
263 kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
264
265 ret = pkvm_drop_host_privileges();
266 if (ret)
267 pr_err("Failed to finalize Hyp protection: %d\n", ret);
268
269 return ret;
270 }
271 device_initcall_sync(finalize_pkvm);
272
cmp_mappings(struct rb_node * node,const struct rb_node * parent)273 static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
274 {
275 struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
276 struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);
277
278 if (a->gfn < b->gfn)
279 return -1;
280 if (a->gfn > b->gfn)
281 return 1;
282 return 0;
283 }
284
find_first_mapping_node(struct rb_root * root,u64 gfn)285 static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
286 {
287 struct rb_node *node = root->rb_node, *prev = NULL;
288 struct pkvm_mapping *mapping;
289
290 while (node) {
291 mapping = rb_entry(node, struct pkvm_mapping, node);
292 if (mapping->gfn == gfn)
293 return node;
294 prev = node;
295 node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
296 }
297
298 return prev;
299 }
300
301 /*
302 * __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
303 * of __map inline.
304 */
305 #define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
306 for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \
307 ((__start) >> PAGE_SHIFT)); \
308 __tmp && ({ \
309 __map = rb_entry(__tmp, struct pkvm_mapping, node); \
310 __tmp = rb_next(__tmp); \
311 true; \
312 }); \
313 ) \
314 if (__map->gfn < ((__start) >> PAGE_SHIFT)) \
315 continue; \
316 else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \
317 break; \
318 else
319
pkvm_pgtable_stage2_init(struct kvm_pgtable * pgt,struct kvm_s2_mmu * mmu,struct kvm_pgtable_mm_ops * mm_ops)320 int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
321 struct kvm_pgtable_mm_ops *mm_ops)
322 {
323 pgt->pkvm_mappings = RB_ROOT;
324 pgt->mmu = mmu;
325
326 return 0;
327 }
328
pkvm_pgtable_stage2_destroy(struct kvm_pgtable * pgt)329 void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
330 {
331 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
332 pkvm_handle_t handle = kvm->arch.pkvm.handle;
333 struct pkvm_mapping *mapping;
334 struct rb_node *node;
335
336 if (!handle)
337 return;
338
339 node = rb_first(&pgt->pkvm_mappings);
340 while (node) {
341 mapping = rb_entry(node, struct pkvm_mapping, node);
342 kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
343 node = rb_next(node);
344 rb_erase(&mapping->node, &pgt->pkvm_mappings);
345 kfree(mapping);
346 }
347 }
348
pkvm_pgtable_stage2_map(struct kvm_pgtable * pgt,u64 addr,u64 size,u64 phys,enum kvm_pgtable_prot prot,void * mc,enum kvm_pgtable_walk_flags flags)349 int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
350 u64 phys, enum kvm_pgtable_prot prot,
351 void *mc, enum kvm_pgtable_walk_flags flags)
352 {
353 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
354 struct pkvm_mapping *mapping = NULL;
355 struct kvm_hyp_memcache *cache = mc;
356 u64 gfn = addr >> PAGE_SHIFT;
357 u64 pfn = phys >> PAGE_SHIFT;
358 int ret;
359
360 if (size != PAGE_SIZE)
361 return -EINVAL;
362
363 lockdep_assert_held_write(&kvm->mmu_lock);
364 ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
365 if (ret) {
366 /* Is the gfn already mapped due to a racing vCPU? */
367 if (ret == -EPERM)
368 return -EAGAIN;
369 }
370
371 swap(mapping, cache->mapping);
372 mapping->gfn = gfn;
373 mapping->pfn = pfn;
374 WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));
375
376 return ret;
377 }
378
pkvm_pgtable_stage2_unmap(struct kvm_pgtable * pgt,u64 addr,u64 size)379 int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
380 {
381 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
382 pkvm_handle_t handle = kvm->arch.pkvm.handle;
383 struct pkvm_mapping *mapping;
384 int ret = 0;
385
386 lockdep_assert_held_write(&kvm->mmu_lock);
387 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
388 ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
389 if (WARN_ON(ret))
390 break;
391 rb_erase(&mapping->node, &pgt->pkvm_mappings);
392 kfree(mapping);
393 }
394
395 return ret;
396 }
397
pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable * pgt,u64 addr,u64 size)398 int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
399 {
400 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
401 pkvm_handle_t handle = kvm->arch.pkvm.handle;
402 struct pkvm_mapping *mapping;
403 int ret = 0;
404
405 lockdep_assert_held(&kvm->mmu_lock);
406 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
407 ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
408 if (WARN_ON(ret))
409 break;
410 }
411
412 return ret;
413 }
414
pkvm_pgtable_stage2_flush(struct kvm_pgtable * pgt,u64 addr,u64 size)415 int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
416 {
417 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
418 struct pkvm_mapping *mapping;
419
420 lockdep_assert_held(&kvm->mmu_lock);
421 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
422 __clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);
423
424 return 0;
425 }
426
pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable * pgt,u64 addr,u64 size,bool mkold)427 bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
428 {
429 struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
430 pkvm_handle_t handle = kvm->arch.pkvm.handle;
431 struct pkvm_mapping *mapping;
432 bool young = false;
433
434 lockdep_assert_held(&kvm->mmu_lock);
435 for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
436 young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
437 mkold);
438
439 return young;
440 }
441
pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_prot prot,enum kvm_pgtable_walk_flags flags)442 int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
443 enum kvm_pgtable_walk_flags flags)
444 {
445 return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
446 }
447
pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable * pgt,u64 addr,enum kvm_pgtable_walk_flags flags)448 void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
449 enum kvm_pgtable_walk_flags flags)
450 {
451 WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
452 }
453
pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops * mm_ops,void * pgtable,s8 level)454 void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
455 {
456 WARN_ON_ONCE(1);
457 }
458
pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable * pgt,u64 phys,s8 level,enum kvm_pgtable_prot prot,void * mc,bool force_pte)459 kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
460 enum kvm_pgtable_prot prot, void *mc, bool force_pte)
461 {
462 WARN_ON_ONCE(1);
463 return NULL;
464 }
465
pkvm_pgtable_stage2_split(struct kvm_pgtable * pgt,u64 addr,u64 size,struct kvm_mmu_memory_cache * mc)466 int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
467 struct kvm_mmu_memory_cache *mc)
468 {
469 WARN_ON_ONCE(1);
470 return -EINVAL;
471 }
472