1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
4 * Author: Joerg Roedel <[email protected]>
5 * Leo Duran <[email protected]>
6 */
7
8 #define pr_fmt(fmt) "AMD-Vi: " fmt
9 #define dev_fmt(fmt) pr_fmt(fmt)
10
11 #include <linux/pci.h>
12 #include <linux/acpi.h>
13 #include <linux/list.h>
14 #include <linux/bitmap.h>
15 #include <linux/slab.h>
16 #include <linux/syscore_ops.h>
17 #include <linux/interrupt.h>
18 #include <linux/msi.h>
19 #include <linux/irq.h>
20 #include <linux/amd-iommu.h>
21 #include <linux/export.h>
22 #include <linux/kmemleak.h>
23 #include <linux/cc_platform.h>
24 #include <linux/iopoll.h>
25 #include <asm/pci-direct.h>
26 #include <asm/iommu.h>
27 #include <asm/apic.h>
28 #include <asm/gart.h>
29 #include <asm/x86_init.h>
30 #include <asm/io_apic.h>
31 #include <asm/irq_remapping.h>
32 #include <asm/set_memory.h>
33 #include <asm/sev.h>
34
35 #include <linux/crash_dump.h>
36
37 #include "amd_iommu.h"
38 #include "../irq_remapping.h"
39 #include "../iommu-pages.h"
40
41 /*
42 * definitions for the ACPI scanning code
43 */
44 #define IVRS_HEADER_LENGTH 48
45
46 #define ACPI_IVHD_TYPE_MAX_SUPPORTED 0x40
47 #define ACPI_IVMD_TYPE_ALL 0x20
48 #define ACPI_IVMD_TYPE 0x21
49 #define ACPI_IVMD_TYPE_RANGE 0x22
50
51 #define IVHD_DEV_ALL 0x01
52 #define IVHD_DEV_SELECT 0x02
53 #define IVHD_DEV_SELECT_RANGE_START 0x03
54 #define IVHD_DEV_RANGE_END 0x04
55 #define IVHD_DEV_ALIAS 0x42
56 #define IVHD_DEV_ALIAS_RANGE 0x43
57 #define IVHD_DEV_EXT_SELECT 0x46
58 #define IVHD_DEV_EXT_SELECT_RANGE 0x47
59 #define IVHD_DEV_SPECIAL 0x48
60 #define IVHD_DEV_ACPI_HID 0xf0
61
62 #define UID_NOT_PRESENT 0
63 #define UID_IS_INTEGER 1
64 #define UID_IS_CHARACTER 2
65
66 #define IVHD_SPECIAL_IOAPIC 1
67 #define IVHD_SPECIAL_HPET 2
68
69 #define IVHD_FLAG_HT_TUN_EN_MASK 0x01
70 #define IVHD_FLAG_PASSPW_EN_MASK 0x02
71 #define IVHD_FLAG_RESPASSPW_EN_MASK 0x04
72 #define IVHD_FLAG_ISOC_EN_MASK 0x08
73
74 #define IVMD_FLAG_EXCL_RANGE 0x08
75 #define IVMD_FLAG_IW 0x04
76 #define IVMD_FLAG_IR 0x02
77 #define IVMD_FLAG_UNITY_MAP 0x01
78
79 #define ACPI_DEVFLAG_INITPASS 0x01
80 #define ACPI_DEVFLAG_EXTINT 0x02
81 #define ACPI_DEVFLAG_NMI 0x04
82 #define ACPI_DEVFLAG_SYSMGT1 0x10
83 #define ACPI_DEVFLAG_SYSMGT2 0x20
84 #define ACPI_DEVFLAG_LINT0 0x40
85 #define ACPI_DEVFLAG_LINT1 0x80
86 #define ACPI_DEVFLAG_ATSDIS 0x10000000
87
88 #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
89 | ((dev & 0x1f) << 3) | (fn & 0x7))
90
91 /*
92 * ACPI table definitions
93 *
94 * These data structures are laid over the table to parse the important values
95 * out of it.
96 */
97
98 /*
99 * structure describing one IOMMU in the ACPI table. Typically followed by one
100 * or more ivhd_entrys.
101 */
102 struct ivhd_header {
103 u8 type;
104 u8 flags;
105 u16 length;
106 u16 devid;
107 u16 cap_ptr;
108 u64 mmio_phys;
109 u16 pci_seg;
110 u16 info;
111 u32 efr_attr;
112
113 /* Following only valid on IVHD type 11h and 40h */
114 u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
115 u64 efr_reg2;
116 } __attribute__((packed));
117
118 /*
119 * A device entry describing which devices a specific IOMMU translates and
120 * which requestor ids they use.
121 */
122 struct ivhd_entry {
123 u8 type;
124 u16 devid;
125 u8 flags;
126 struct_group(ext_hid,
127 u32 ext;
128 u32 hidh;
129 );
130 u64 cid;
131 u8 uidf;
132 u8 uidl;
133 u8 uid;
134 } __attribute__((packed));
135
136 /*
137 * An AMD IOMMU memory definition structure. It defines things like exclusion
138 * ranges for devices and regions that should be unity mapped.
139 */
140 struct ivmd_header {
141 u8 type;
142 u8 flags;
143 u16 length;
144 u16 devid;
145 u16 aux;
146 u16 pci_seg;
147 u8 resv[6];
148 u64 range_start;
149 u64 range_length;
150 } __attribute__((packed));
151
152 bool amd_iommu_dump;
153 bool amd_iommu_irq_remap __read_mostly;
154
155 enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1;
156 /* Guest page table level */
157 int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
158
159 int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
160 static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
161
162 static bool amd_iommu_detected;
163 static bool amd_iommu_disabled __initdata;
164 static bool amd_iommu_force_enable __initdata;
165 static bool amd_iommu_irtcachedis;
166 static int amd_iommu_target_ivhd_type;
167
168 /* Global EFR and EFR2 registers */
169 u64 amd_iommu_efr;
170 u64 amd_iommu_efr2;
171
172 /* SNP is enabled on the system? */
173 bool amd_iommu_snp_en;
174 EXPORT_SYMBOL(amd_iommu_snp_en);
175
176 LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
177 LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */
178 LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */
179
180 /* Number of IOMMUs present in the system */
181 static int amd_iommus_present;
182
183 /* IOMMUs have a non-present cache? */
184 bool amd_iommu_np_cache __read_mostly;
185 bool amd_iommu_iotlb_sup __read_mostly = true;
186
187 static bool amd_iommu_pc_present __read_mostly;
188 bool amdr_ivrs_remap_support __read_mostly;
189
190 bool amd_iommu_force_isolation __read_mostly;
191
192 unsigned long amd_iommu_pgsize_bitmap __ro_after_init = AMD_IOMMU_PGSIZES;
193
194 enum iommu_init_state {
195 IOMMU_START_STATE,
196 IOMMU_IVRS_DETECTED,
197 IOMMU_ACPI_FINISHED,
198 IOMMU_ENABLED,
199 IOMMU_PCI_INIT,
200 IOMMU_INTERRUPTS_EN,
201 IOMMU_INITIALIZED,
202 IOMMU_NOT_FOUND,
203 IOMMU_INIT_ERROR,
204 IOMMU_CMDLINE_DISABLED,
205 };
206
207 /* Early ioapic and hpet maps from kernel command line */
208 #define EARLY_MAP_SIZE 4
209 static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
210 static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
211 static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
212
213 static int __initdata early_ioapic_map_size;
214 static int __initdata early_hpet_map_size;
215 static int __initdata early_acpihid_map_size;
216
217 static bool __initdata cmdline_maps;
218
219 static enum iommu_init_state init_state = IOMMU_START_STATE;
220
221 static int amd_iommu_enable_interrupts(void);
222 static int __init iommu_go_to_state(enum iommu_init_state state);
223 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
224
225 static bool amd_iommu_pre_enabled = true;
226
227 static u32 amd_iommu_ivinfo __initdata;
228
translation_pre_enabled(struct amd_iommu * iommu)229 bool translation_pre_enabled(struct amd_iommu *iommu)
230 {
231 return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
232 }
233
clear_translation_pre_enabled(struct amd_iommu * iommu)234 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
235 {
236 iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
237 }
238
init_translation_status(struct amd_iommu * iommu)239 static void init_translation_status(struct amd_iommu *iommu)
240 {
241 u64 ctrl;
242
243 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
244 if (ctrl & (1<<CONTROL_IOMMU_EN))
245 iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
246 }
247
tbl_size(int entry_size,int last_bdf)248 static inline unsigned long tbl_size(int entry_size, int last_bdf)
249 {
250 unsigned shift = PAGE_SHIFT +
251 get_order((last_bdf + 1) * entry_size);
252
253 return 1UL << shift;
254 }
255
amd_iommu_get_num_iommus(void)256 int amd_iommu_get_num_iommus(void)
257 {
258 return amd_iommus_present;
259 }
260
261 /*
262 * Iterate through all the IOMMUs to get common EFR
263 * masks among all IOMMUs and warn if found inconsistency.
264 */
get_global_efr(void)265 static __init void get_global_efr(void)
266 {
267 struct amd_iommu *iommu;
268
269 for_each_iommu(iommu) {
270 u64 tmp = iommu->features;
271 u64 tmp2 = iommu->features2;
272
273 if (list_is_first(&iommu->list, &amd_iommu_list)) {
274 amd_iommu_efr = tmp;
275 amd_iommu_efr2 = tmp2;
276 continue;
277 }
278
279 if (amd_iommu_efr == tmp &&
280 amd_iommu_efr2 == tmp2)
281 continue;
282
283 pr_err(FW_BUG
284 "Found inconsistent EFR/EFR2 %#llx,%#llx (global %#llx,%#llx) on iommu%d (%04x:%02x:%02x.%01x).\n",
285 tmp, tmp2, amd_iommu_efr, amd_iommu_efr2,
286 iommu->index, iommu->pci_seg->id,
287 PCI_BUS_NUM(iommu->devid), PCI_SLOT(iommu->devid),
288 PCI_FUNC(iommu->devid));
289
290 amd_iommu_efr &= tmp;
291 amd_iommu_efr2 &= tmp2;
292 }
293
294 pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
295 }
296
297 /*
298 * For IVHD type 0x11/0x40, EFR is also available via IVHD.
299 * Default to IVHD EFR since it is available sooner
300 * (i.e. before PCI init).
301 */
early_iommu_features_init(struct amd_iommu * iommu,struct ivhd_header * h)302 static void __init early_iommu_features_init(struct amd_iommu *iommu,
303 struct ivhd_header *h)
304 {
305 if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) {
306 iommu->features = h->efr_reg;
307 iommu->features2 = h->efr_reg2;
308 }
309 if (amd_iommu_ivinfo & IOMMU_IVINFO_DMA_REMAP)
310 amdr_ivrs_remap_support = true;
311 }
312
313 /* Access to l1 and l2 indexed register spaces */
314
iommu_read_l1(struct amd_iommu * iommu,u16 l1,u8 address)315 static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
316 {
317 u32 val;
318
319 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
320 pci_read_config_dword(iommu->dev, 0xfc, &val);
321 return val;
322 }
323
iommu_write_l1(struct amd_iommu * iommu,u16 l1,u8 address,u32 val)324 static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
325 {
326 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
327 pci_write_config_dword(iommu->dev, 0xfc, val);
328 pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
329 }
330
iommu_read_l2(struct amd_iommu * iommu,u8 address)331 static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
332 {
333 u32 val;
334
335 pci_write_config_dword(iommu->dev, 0xf0, address);
336 pci_read_config_dword(iommu->dev, 0xf4, &val);
337 return val;
338 }
339
iommu_write_l2(struct amd_iommu * iommu,u8 address,u32 val)340 static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
341 {
342 pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
343 pci_write_config_dword(iommu->dev, 0xf4, val);
344 }
345
346 /****************************************************************************
347 *
348 * AMD IOMMU MMIO register space handling functions
349 *
350 * These functions are used to program the IOMMU device registers in
351 * MMIO space required for that driver.
352 *
353 ****************************************************************************/
354
355 /*
356 * This function set the exclusion range in the IOMMU. DMA accesses to the
357 * exclusion range are passed through untranslated
358 */
iommu_set_exclusion_range(struct amd_iommu * iommu)359 static void iommu_set_exclusion_range(struct amd_iommu *iommu)
360 {
361 u64 start = iommu->exclusion_start & PAGE_MASK;
362 u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
363 u64 entry;
364
365 if (!iommu->exclusion_start)
366 return;
367
368 entry = start | MMIO_EXCL_ENABLE_MASK;
369 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
370 &entry, sizeof(entry));
371
372 entry = limit;
373 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
374 &entry, sizeof(entry));
375 }
376
iommu_set_cwwb_range(struct amd_iommu * iommu)377 static void iommu_set_cwwb_range(struct amd_iommu *iommu)
378 {
379 u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
380 u64 entry = start & PM_ADDR_MASK;
381
382 if (!check_feature(FEATURE_SNP))
383 return;
384
385 /* Note:
386 * Re-purpose Exclusion base/limit registers for Completion wait
387 * write-back base/limit.
388 */
389 memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
390 &entry, sizeof(entry));
391
392 /* Note:
393 * Default to 4 Kbytes, which can be specified by setting base
394 * address equal to the limit address.
395 */
396 memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
397 &entry, sizeof(entry));
398 }
399
400 /* Programs the physical address of the device table into the IOMMU hardware */
iommu_set_device_table(struct amd_iommu * iommu)401 static void iommu_set_device_table(struct amd_iommu *iommu)
402 {
403 u64 entry;
404 u32 dev_table_size = iommu->pci_seg->dev_table_size;
405 void *dev_table = (void *)get_dev_table(iommu);
406
407 BUG_ON(iommu->mmio_base == NULL);
408
409 entry = iommu_virt_to_phys(dev_table);
410 entry |= (dev_table_size >> 12) - 1;
411 memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
412 &entry, sizeof(entry));
413 }
414
415 /* Generic functions to enable/disable certain features of the IOMMU. */
iommu_feature_enable(struct amd_iommu * iommu,u8 bit)416 void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
417 {
418 u64 ctrl;
419
420 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
421 ctrl |= (1ULL << bit);
422 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
423 }
424
iommu_feature_disable(struct amd_iommu * iommu,u8 bit)425 static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
426 {
427 u64 ctrl;
428
429 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
430 ctrl &= ~(1ULL << bit);
431 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
432 }
433
iommu_set_inv_tlb_timeout(struct amd_iommu * iommu,int timeout)434 static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
435 {
436 u64 ctrl;
437
438 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
439 ctrl &= ~CTRL_INV_TO_MASK;
440 ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
441 writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
442 }
443
444 /* Function to enable the hardware */
iommu_enable(struct amd_iommu * iommu)445 static void iommu_enable(struct amd_iommu *iommu)
446 {
447 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
448 }
449
iommu_disable(struct amd_iommu * iommu)450 static void iommu_disable(struct amd_iommu *iommu)
451 {
452 if (!iommu->mmio_base)
453 return;
454
455 /* Disable command buffer */
456 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
457
458 /* Disable event logging and event interrupts */
459 iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
460 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
461
462 /* Disable IOMMU GA_LOG */
463 iommu_feature_disable(iommu, CONTROL_GALOG_EN);
464 iommu_feature_disable(iommu, CONTROL_GAINT_EN);
465
466 /* Disable IOMMU PPR logging */
467 iommu_feature_disable(iommu, CONTROL_PPRLOG_EN);
468 iommu_feature_disable(iommu, CONTROL_PPRINT_EN);
469
470 /* Disable IOMMU hardware itself */
471 iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
472
473 /* Clear IRTE cache disabling bit */
474 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
475 }
476
477 /*
478 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
479 * the system has one.
480 */
iommu_map_mmio_space(u64 address,u64 end)481 static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
482 {
483 if (!request_mem_region(address, end, "amd_iommu")) {
484 pr_err("Can not reserve memory region %llx-%llx for mmio\n",
485 address, end);
486 pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
487 return NULL;
488 }
489
490 return (u8 __iomem *)ioremap(address, end);
491 }
492
iommu_unmap_mmio_space(struct amd_iommu * iommu)493 static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
494 {
495 if (iommu->mmio_base)
496 iounmap(iommu->mmio_base);
497 release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
498 }
499
get_ivhd_header_size(struct ivhd_header * h)500 static inline u32 get_ivhd_header_size(struct ivhd_header *h)
501 {
502 u32 size = 0;
503
504 switch (h->type) {
505 case 0x10:
506 size = 24;
507 break;
508 case 0x11:
509 case 0x40:
510 size = 40;
511 break;
512 }
513 return size;
514 }
515
516 /****************************************************************************
517 *
518 * The functions below belong to the first pass of AMD IOMMU ACPI table
519 * parsing. In this pass we try to find out the highest device id this
520 * code has to handle. Upon this information the size of the shared data
521 * structures is determined later.
522 *
523 ****************************************************************************/
524
525 /*
526 * This function calculates the length of a given IVHD entry
527 */
ivhd_entry_length(u8 * ivhd)528 static inline int ivhd_entry_length(u8 *ivhd)
529 {
530 u32 type = ((struct ivhd_entry *)ivhd)->type;
531
532 if (type < 0x80) {
533 return 0x04 << (*ivhd >> 6);
534 } else if (type == IVHD_DEV_ACPI_HID) {
535 /* For ACPI_HID, offset 21 is uid len */
536 return *((u8 *)ivhd + 21) + 22;
537 }
538 return 0;
539 }
540
541 /*
542 * After reading the highest device id from the IOMMU PCI capability header
543 * this function looks if there is a higher device id defined in the ACPI table
544 */
find_last_devid_from_ivhd(struct ivhd_header * h)545 static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
546 {
547 u8 *p = (void *)h, *end = (void *)h;
548 struct ivhd_entry *dev;
549 int last_devid = -EINVAL;
550
551 u32 ivhd_size = get_ivhd_header_size(h);
552
553 if (!ivhd_size) {
554 pr_err("Unsupported IVHD type %#x\n", h->type);
555 return -EINVAL;
556 }
557
558 p += ivhd_size;
559 end += h->length;
560
561 while (p < end) {
562 dev = (struct ivhd_entry *)p;
563 switch (dev->type) {
564 case IVHD_DEV_ALL:
565 /* Use maximum BDF value for DEV_ALL */
566 return 0xffff;
567 case IVHD_DEV_SELECT:
568 case IVHD_DEV_RANGE_END:
569 case IVHD_DEV_ALIAS:
570 case IVHD_DEV_EXT_SELECT:
571 /* all the above subfield types refer to device ids */
572 if (dev->devid > last_devid)
573 last_devid = dev->devid;
574 break;
575 default:
576 break;
577 }
578 p += ivhd_entry_length(p);
579 }
580
581 WARN_ON(p != end);
582
583 return last_devid;
584 }
585
check_ivrs_checksum(struct acpi_table_header * table)586 static int __init check_ivrs_checksum(struct acpi_table_header *table)
587 {
588 int i;
589 u8 checksum = 0, *p = (u8 *)table;
590
591 for (i = 0; i < table->length; ++i)
592 checksum += p[i];
593 if (checksum != 0) {
594 /* ACPI table corrupt */
595 pr_err(FW_BUG "IVRS invalid checksum\n");
596 return -ENODEV;
597 }
598
599 return 0;
600 }
601
602 /*
603 * Iterate over all IVHD entries in the ACPI table and find the highest device
604 * id which we need to handle. This is the first of three functions which parse
605 * the ACPI table. So we check the checksum here.
606 */
find_last_devid_acpi(struct acpi_table_header * table,u16 pci_seg)607 static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_seg)
608 {
609 u8 *p = (u8 *)table, *end = (u8 *)table;
610 struct ivhd_header *h;
611 int last_devid, last_bdf = 0;
612
613 p += IVRS_HEADER_LENGTH;
614
615 end += table->length;
616 while (p < end) {
617 h = (struct ivhd_header *)p;
618 if (h->pci_seg == pci_seg &&
619 h->type == amd_iommu_target_ivhd_type) {
620 last_devid = find_last_devid_from_ivhd(h);
621
622 if (last_devid < 0)
623 return -EINVAL;
624 if (last_devid > last_bdf)
625 last_bdf = last_devid;
626 }
627 p += h->length;
628 }
629 WARN_ON(p != end);
630
631 return last_bdf;
632 }
633
634 /****************************************************************************
635 *
636 * The following functions belong to the code path which parses the ACPI table
637 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
638 * data structures, initialize the per PCI segment device/alias/rlookup table
639 * and also basically initialize the hardware.
640 *
641 ****************************************************************************/
642
643 /* Allocate per PCI segment device table */
alloc_dev_table(struct amd_iommu_pci_seg * pci_seg)644 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
645 {
646 pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
647 get_order(pci_seg->dev_table_size));
648 if (!pci_seg->dev_table)
649 return -ENOMEM;
650
651 return 0;
652 }
653
free_dev_table(struct amd_iommu_pci_seg * pci_seg)654 static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
655 {
656 iommu_free_pages(pci_seg->dev_table,
657 get_order(pci_seg->dev_table_size));
658 pci_seg->dev_table = NULL;
659 }
660
661 /* Allocate per PCI segment IOMMU rlookup table. */
alloc_rlookup_table(struct amd_iommu_pci_seg * pci_seg)662 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
663 {
664 pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL,
665 get_order(pci_seg->rlookup_table_size));
666 if (pci_seg->rlookup_table == NULL)
667 return -ENOMEM;
668
669 return 0;
670 }
671
free_rlookup_table(struct amd_iommu_pci_seg * pci_seg)672 static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
673 {
674 iommu_free_pages(pci_seg->rlookup_table,
675 get_order(pci_seg->rlookup_table_size));
676 pci_seg->rlookup_table = NULL;
677 }
678
alloc_irq_lookup_table(struct amd_iommu_pci_seg * pci_seg)679 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
680 {
681 pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL,
682 get_order(pci_seg->rlookup_table_size));
683 kmemleak_alloc(pci_seg->irq_lookup_table,
684 pci_seg->rlookup_table_size, 1, GFP_KERNEL);
685 if (pci_seg->irq_lookup_table == NULL)
686 return -ENOMEM;
687
688 return 0;
689 }
690
free_irq_lookup_table(struct amd_iommu_pci_seg * pci_seg)691 static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
692 {
693 kmemleak_free(pci_seg->irq_lookup_table);
694 iommu_free_pages(pci_seg->irq_lookup_table,
695 get_order(pci_seg->rlookup_table_size));
696 pci_seg->irq_lookup_table = NULL;
697 }
698
alloc_alias_table(struct amd_iommu_pci_seg * pci_seg)699 static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
700 {
701 int i;
702
703 pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL,
704 get_order(pci_seg->alias_table_size));
705 if (!pci_seg->alias_table)
706 return -ENOMEM;
707
708 /*
709 * let all alias entries point to itself
710 */
711 for (i = 0; i <= pci_seg->last_bdf; ++i)
712 pci_seg->alias_table[i] = i;
713
714 return 0;
715 }
716
free_alias_table(struct amd_iommu_pci_seg * pci_seg)717 static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
718 {
719 iommu_free_pages(pci_seg->alias_table,
720 get_order(pci_seg->alias_table_size));
721 pci_seg->alias_table = NULL;
722 }
723
724 /*
725 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
726 * write commands to that buffer later and the IOMMU will execute them
727 * asynchronously
728 */
alloc_command_buffer(struct amd_iommu * iommu)729 static int __init alloc_command_buffer(struct amd_iommu *iommu)
730 {
731 iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL,
732 get_order(CMD_BUFFER_SIZE));
733
734 return iommu->cmd_buf ? 0 : -ENOMEM;
735 }
736
737 /*
738 * Interrupt handler has processed all pending events and adjusted head
739 * and tail pointer. Reset overflow mask and restart logging again.
740 */
amd_iommu_restart_log(struct amd_iommu * iommu,const char * evt_type,u8 cntrl_intr,u8 cntrl_log,u32 status_run_mask,u32 status_overflow_mask)741 void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
742 u8 cntrl_intr, u8 cntrl_log,
743 u32 status_run_mask, u32 status_overflow_mask)
744 {
745 u32 status;
746
747 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
748 if (status & status_run_mask)
749 return;
750
751 pr_info_ratelimited("IOMMU %s log restarting\n", evt_type);
752
753 iommu_feature_disable(iommu, cntrl_log);
754 iommu_feature_disable(iommu, cntrl_intr);
755
756 writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET);
757
758 iommu_feature_enable(iommu, cntrl_intr);
759 iommu_feature_enable(iommu, cntrl_log);
760 }
761
762 /*
763 * This function restarts event logging in case the IOMMU experienced
764 * an event log buffer overflow.
765 */
amd_iommu_restart_event_logging(struct amd_iommu * iommu)766 void amd_iommu_restart_event_logging(struct amd_iommu *iommu)
767 {
768 amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN,
769 CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK,
770 MMIO_STATUS_EVT_OVERFLOW_MASK);
771 }
772
773 /*
774 * This function restarts event logging in case the IOMMU experienced
775 * GA log overflow.
776 */
amd_iommu_restart_ga_log(struct amd_iommu * iommu)777 void amd_iommu_restart_ga_log(struct amd_iommu *iommu)
778 {
779 amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN,
780 CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK,
781 MMIO_STATUS_GALOG_OVERFLOW_MASK);
782 }
783
784 /*
785 * This function resets the command buffer if the IOMMU stopped fetching
786 * commands from it.
787 */
amd_iommu_reset_cmd_buffer(struct amd_iommu * iommu)788 static void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
789 {
790 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
791
792 writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
793 writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
794 iommu->cmd_buf_head = 0;
795 iommu->cmd_buf_tail = 0;
796
797 iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
798 }
799
800 /*
801 * This function writes the command buffer address to the hardware and
802 * enables it.
803 */
iommu_enable_command_buffer(struct amd_iommu * iommu)804 static void iommu_enable_command_buffer(struct amd_iommu *iommu)
805 {
806 u64 entry;
807
808 BUG_ON(iommu->cmd_buf == NULL);
809
810 entry = iommu_virt_to_phys(iommu->cmd_buf);
811 entry |= MMIO_CMD_SIZE_512;
812
813 memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
814 &entry, sizeof(entry));
815
816 amd_iommu_reset_cmd_buffer(iommu);
817 }
818
819 /*
820 * This function disables the command buffer
821 */
iommu_disable_command_buffer(struct amd_iommu * iommu)822 static void iommu_disable_command_buffer(struct amd_iommu *iommu)
823 {
824 iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
825 }
826
free_command_buffer(struct amd_iommu * iommu)827 static void __init free_command_buffer(struct amd_iommu *iommu)
828 {
829 iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
830 }
831
iommu_alloc_4k_pages(struct amd_iommu * iommu,gfp_t gfp,size_t size)832 void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
833 size_t size)
834 {
835 int order = get_order(size);
836 void *buf = iommu_alloc_pages(gfp, order);
837
838 if (buf &&
839 check_feature(FEATURE_SNP) &&
840 set_memory_4k((unsigned long)buf, (1 << order))) {
841 iommu_free_pages(buf, order);
842 buf = NULL;
843 }
844
845 return buf;
846 }
847
848 /* allocates the memory where the IOMMU will log its events to */
alloc_event_buffer(struct amd_iommu * iommu)849 static int __init alloc_event_buffer(struct amd_iommu *iommu)
850 {
851 iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL,
852 EVT_BUFFER_SIZE);
853
854 return iommu->evt_buf ? 0 : -ENOMEM;
855 }
856
iommu_enable_event_buffer(struct amd_iommu * iommu)857 static void iommu_enable_event_buffer(struct amd_iommu *iommu)
858 {
859 u64 entry;
860
861 BUG_ON(iommu->evt_buf == NULL);
862
863 entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
864
865 memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
866 &entry, sizeof(entry));
867
868 /* set head and tail to zero manually */
869 writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
870 writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
871
872 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
873 }
874
875 /*
876 * This function disables the event log buffer
877 */
iommu_disable_event_buffer(struct amd_iommu * iommu)878 static void iommu_disable_event_buffer(struct amd_iommu *iommu)
879 {
880 iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
881 }
882
free_event_buffer(struct amd_iommu * iommu)883 static void __init free_event_buffer(struct amd_iommu *iommu)
884 {
885 iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
886 }
887
free_ga_log(struct amd_iommu * iommu)888 static void free_ga_log(struct amd_iommu *iommu)
889 {
890 #ifdef CONFIG_IRQ_REMAP
891 iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE));
892 iommu_free_pages(iommu->ga_log_tail, get_order(8));
893 #endif
894 }
895
896 #ifdef CONFIG_IRQ_REMAP
iommu_ga_log_enable(struct amd_iommu * iommu)897 static int iommu_ga_log_enable(struct amd_iommu *iommu)
898 {
899 u32 status, i;
900 u64 entry;
901
902 if (!iommu->ga_log)
903 return -EINVAL;
904
905 entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
906 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
907 &entry, sizeof(entry));
908 entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
909 (BIT_ULL(52)-1)) & ~7ULL;
910 memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
911 &entry, sizeof(entry));
912 writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
913 writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
914
915
916 iommu_feature_enable(iommu, CONTROL_GAINT_EN);
917 iommu_feature_enable(iommu, CONTROL_GALOG_EN);
918
919 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
920 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
921 if (status & (MMIO_STATUS_GALOG_RUN_MASK))
922 break;
923 udelay(10);
924 }
925
926 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
927 return -EINVAL;
928
929 return 0;
930 }
931
iommu_init_ga_log(struct amd_iommu * iommu)932 static int iommu_init_ga_log(struct amd_iommu *iommu)
933 {
934 if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
935 return 0;
936
937 iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE));
938 if (!iommu->ga_log)
939 goto err_out;
940
941 iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8));
942 if (!iommu->ga_log_tail)
943 goto err_out;
944
945 return 0;
946 err_out:
947 free_ga_log(iommu);
948 return -EINVAL;
949 }
950 #endif /* CONFIG_IRQ_REMAP */
951
alloc_cwwb_sem(struct amd_iommu * iommu)952 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
953 {
954 iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL, 1);
955
956 return iommu->cmd_sem ? 0 : -ENOMEM;
957 }
958
free_cwwb_sem(struct amd_iommu * iommu)959 static void __init free_cwwb_sem(struct amd_iommu *iommu)
960 {
961 if (iommu->cmd_sem)
962 iommu_free_page((void *)iommu->cmd_sem);
963 }
964
iommu_enable_xt(struct amd_iommu * iommu)965 static void iommu_enable_xt(struct amd_iommu *iommu)
966 {
967 #ifdef CONFIG_IRQ_REMAP
968 /*
969 * XT mode (32-bit APIC destination ID) requires
970 * GA mode (128-bit IRTE support) as a prerequisite.
971 */
972 if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
973 amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
974 iommu_feature_enable(iommu, CONTROL_XT_EN);
975 #endif /* CONFIG_IRQ_REMAP */
976 }
977
iommu_enable_gt(struct amd_iommu * iommu)978 static void iommu_enable_gt(struct amd_iommu *iommu)
979 {
980 if (!check_feature(FEATURE_GT))
981 return;
982
983 iommu_feature_enable(iommu, CONTROL_GT_EN);
984 }
985
986 /* sets a specific bit in the device table entry. */
set_dte_bit(struct dev_table_entry * dte,u8 bit)987 static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
988 {
989 int i = (bit >> 6) & 0x03;
990 int _bit = bit & 0x3f;
991
992 dte->data[i] |= (1UL << _bit);
993 }
994
__copy_device_table(struct amd_iommu * iommu)995 static bool __copy_device_table(struct amd_iommu *iommu)
996 {
997 u64 int_ctl, int_tab_len, entry = 0;
998 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
999 struct dev_table_entry *old_devtb = NULL;
1000 u32 lo, hi, devid, old_devtb_size;
1001 phys_addr_t old_devtb_phys;
1002 u16 dom_id, dte_v, irq_v;
1003 u64 tmp;
1004
1005 /* Each IOMMU use separate device table with the same size */
1006 lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
1007 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
1008 entry = (((u64) hi) << 32) + lo;
1009
1010 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
1011 if (old_devtb_size != pci_seg->dev_table_size) {
1012 pr_err("The device table size of IOMMU:%d is not expected!\n",
1013 iommu->index);
1014 return false;
1015 }
1016
1017 /*
1018 * When SME is enabled in the first kernel, the entry includes the
1019 * memory encryption mask(sme_me_mask), we must remove the memory
1020 * encryption mask to obtain the true physical address in kdump kernel.
1021 */
1022 old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
1023
1024 if (old_devtb_phys >= 0x100000000ULL) {
1025 pr_err("The address of old device table is above 4G, not trustworthy!\n");
1026 return false;
1027 }
1028 old_devtb = (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) && is_kdump_kernel())
1029 ? (__force void *)ioremap_encrypted(old_devtb_phys,
1030 pci_seg->dev_table_size)
1031 : memremap(old_devtb_phys, pci_seg->dev_table_size, MEMREMAP_WB);
1032
1033 if (!old_devtb)
1034 return false;
1035
1036 pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
1037 get_order(pci_seg->dev_table_size));
1038 if (pci_seg->old_dev_tbl_cpy == NULL) {
1039 pr_err("Failed to allocate memory for copying old device table!\n");
1040 memunmap(old_devtb);
1041 return false;
1042 }
1043
1044 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
1045 pci_seg->old_dev_tbl_cpy[devid] = old_devtb[devid];
1046 dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
1047 dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
1048
1049 if (dte_v && dom_id) {
1050 pci_seg->old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
1051 pci_seg->old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
1052 /* Reserve the Domain IDs used by previous kernel */
1053 if (ida_alloc_range(&pdom_ids, dom_id, dom_id, GFP_ATOMIC) != dom_id) {
1054 pr_err("Failed to reserve domain ID 0x%x\n", dom_id);
1055 memunmap(old_devtb);
1056 return false;
1057 }
1058 /* If gcr3 table existed, mask it out */
1059 if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
1060 tmp = (DTE_GCR3_30_15 | DTE_GCR3_51_31);
1061 pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
1062 tmp = (DTE_GCR3_14_12 | DTE_FLAG_GV);
1063 pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
1064 }
1065 }
1066
1067 irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
1068 int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
1069 int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
1070 if (irq_v && (int_ctl || int_tab_len)) {
1071 if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
1072 (int_tab_len != DTE_INTTABLEN)) {
1073 pr_err("Wrong old irq remapping flag: %#x\n", devid);
1074 memunmap(old_devtb);
1075 return false;
1076 }
1077
1078 pci_seg->old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1079 }
1080 }
1081 memunmap(old_devtb);
1082
1083 return true;
1084 }
1085
copy_device_table(void)1086 static bool copy_device_table(void)
1087 {
1088 struct amd_iommu *iommu;
1089 struct amd_iommu_pci_seg *pci_seg;
1090
1091 if (!amd_iommu_pre_enabled)
1092 return false;
1093
1094 pr_warn("Translation is already enabled - trying to copy translation structures\n");
1095
1096 /*
1097 * All IOMMUs within PCI segment shares common device table.
1098 * Hence copy device table only once per PCI segment.
1099 */
1100 for_each_pci_segment(pci_seg) {
1101 for_each_iommu(iommu) {
1102 if (pci_seg->id != iommu->pci_seg->id)
1103 continue;
1104 if (!__copy_device_table(iommu))
1105 return false;
1106 break;
1107 }
1108 }
1109
1110 return true;
1111 }
1112
amd_iommu_get_ivhd_dte_flags(u16 segid,u16 devid)1113 struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid)
1114 {
1115 struct ivhd_dte_flags *e;
1116 unsigned int best_len = UINT_MAX;
1117 struct dev_table_entry *dte = NULL;
1118
1119 for_each_ivhd_dte_flags(e) {
1120 /*
1121 * Need to go through the whole list to find the smallest range,
1122 * which contains the devid.
1123 */
1124 if ((e->segid == segid) &&
1125 (e->devid_first <= devid) && (devid <= e->devid_last)) {
1126 unsigned int len = e->devid_last - e->devid_first;
1127
1128 if (len < best_len) {
1129 dte = &(e->dte);
1130 best_len = len;
1131 }
1132 }
1133 }
1134 return dte;
1135 }
1136
search_ivhd_dte_flags(u16 segid,u16 first,u16 last)1137 static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last)
1138 {
1139 struct ivhd_dte_flags *e;
1140
1141 for_each_ivhd_dte_flags(e) {
1142 if ((e->segid == segid) &&
1143 (e->devid_first == first) &&
1144 (e->devid_last == last))
1145 return true;
1146 }
1147 return false;
1148 }
1149
1150 /*
1151 * This function takes the device specific flags read from the ACPI
1152 * table and sets up the device table entry with that information
1153 */
1154 static void __init
set_dev_entry_from_acpi_range(struct amd_iommu * iommu,u16 first,u16 last,u32 flags,u32 ext_flags)1155 set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last,
1156 u32 flags, u32 ext_flags)
1157 {
1158 int i;
1159 struct dev_table_entry dte = {};
1160
1161 /* Parse IVHD DTE setting flags and store information */
1162 if (flags) {
1163 struct ivhd_dte_flags *d;
1164
1165 if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last))
1166 return;
1167
1168 d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL);
1169 if (!d)
1170 return;
1171
1172 pr_debug("%s: devid range %#x:%#x\n", __func__, first, last);
1173
1174 if (flags & ACPI_DEVFLAG_INITPASS)
1175 set_dte_bit(&dte, DEV_ENTRY_INIT_PASS);
1176 if (flags & ACPI_DEVFLAG_EXTINT)
1177 set_dte_bit(&dte, DEV_ENTRY_EINT_PASS);
1178 if (flags & ACPI_DEVFLAG_NMI)
1179 set_dte_bit(&dte, DEV_ENTRY_NMI_PASS);
1180 if (flags & ACPI_DEVFLAG_SYSMGT1)
1181 set_dte_bit(&dte, DEV_ENTRY_SYSMGT1);
1182 if (flags & ACPI_DEVFLAG_SYSMGT2)
1183 set_dte_bit(&dte, DEV_ENTRY_SYSMGT2);
1184 if (flags & ACPI_DEVFLAG_LINT0)
1185 set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS);
1186 if (flags & ACPI_DEVFLAG_LINT1)
1187 set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS);
1188
1189 /* Apply erratum 63, which needs info in initial_dte */
1190 if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1)
1191 dte.data[0] |= DTE_FLAG_IW;
1192
1193 memcpy(&d->dte, &dte, sizeof(dte));
1194 d->segid = iommu->pci_seg->id;
1195 d->devid_first = first;
1196 d->devid_last = last;
1197 list_add_tail(&d->list, &amd_ivhd_dev_flags_list);
1198 }
1199
1200 for (i = first; i <= last; i++) {
1201 if (flags) {
1202 struct dev_table_entry *dev_table = get_dev_table(iommu);
1203
1204 memcpy(&dev_table[i], &dte, sizeof(dte));
1205 }
1206 amd_iommu_set_rlookup_table(iommu, i);
1207 }
1208 }
1209
set_dev_entry_from_acpi(struct amd_iommu * iommu,u16 devid,u32 flags,u32 ext_flags)1210 static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1211 u16 devid, u32 flags, u32 ext_flags)
1212 {
1213 set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags);
1214 }
1215
add_special_device(u8 type,u8 id,u32 * devid,bool cmd_line)1216 int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
1217 {
1218 struct devid_map *entry;
1219 struct list_head *list;
1220
1221 if (type == IVHD_SPECIAL_IOAPIC)
1222 list = &ioapic_map;
1223 else if (type == IVHD_SPECIAL_HPET)
1224 list = &hpet_map;
1225 else
1226 return -EINVAL;
1227
1228 list_for_each_entry(entry, list, list) {
1229 if (!(entry->id == id && entry->cmd_line))
1230 continue;
1231
1232 pr_info("Command-line override present for %s id %d - ignoring\n",
1233 type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1234
1235 *devid = entry->devid;
1236
1237 return 0;
1238 }
1239
1240 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1241 if (!entry)
1242 return -ENOMEM;
1243
1244 entry->id = id;
1245 entry->devid = *devid;
1246 entry->cmd_line = cmd_line;
1247
1248 list_add_tail(&entry->list, list);
1249
1250 return 0;
1251 }
1252
add_acpi_hid_device(u8 * hid,u8 * uid,u32 * devid,bool cmd_line)1253 static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
1254 bool cmd_line)
1255 {
1256 struct acpihid_map_entry *entry;
1257 struct list_head *list = &acpihid_map;
1258
1259 list_for_each_entry(entry, list, list) {
1260 if (strcmp(entry->hid, hid) ||
1261 (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1262 !entry->cmd_line)
1263 continue;
1264
1265 pr_info("Command-line override for hid:%s uid:%s\n",
1266 hid, uid);
1267 *devid = entry->devid;
1268 return 0;
1269 }
1270
1271 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1272 if (!entry)
1273 return -ENOMEM;
1274
1275 memcpy(entry->uid, uid, strlen(uid));
1276 memcpy(entry->hid, hid, strlen(hid));
1277 entry->devid = *devid;
1278 entry->cmd_line = cmd_line;
1279 entry->root_devid = (entry->devid & (~0x7));
1280
1281 pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n",
1282 entry->cmd_line ? "cmd" : "ivrs",
1283 entry->hid, entry->uid, entry->root_devid);
1284
1285 list_add_tail(&entry->list, list);
1286 return 0;
1287 }
1288
add_early_maps(void)1289 static int __init add_early_maps(void)
1290 {
1291 int i, ret;
1292
1293 for (i = 0; i < early_ioapic_map_size; ++i) {
1294 ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1295 early_ioapic_map[i].id,
1296 &early_ioapic_map[i].devid,
1297 early_ioapic_map[i].cmd_line);
1298 if (ret)
1299 return ret;
1300 }
1301
1302 for (i = 0; i < early_hpet_map_size; ++i) {
1303 ret = add_special_device(IVHD_SPECIAL_HPET,
1304 early_hpet_map[i].id,
1305 &early_hpet_map[i].devid,
1306 early_hpet_map[i].cmd_line);
1307 if (ret)
1308 return ret;
1309 }
1310
1311 for (i = 0; i < early_acpihid_map_size; ++i) {
1312 ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1313 early_acpihid_map[i].uid,
1314 &early_acpihid_map[i].devid,
1315 early_acpihid_map[i].cmd_line);
1316 if (ret)
1317 return ret;
1318 }
1319
1320 return 0;
1321 }
1322
1323 /*
1324 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1325 * initializes the hardware and our data structures with it.
1326 */
init_iommu_from_acpi(struct amd_iommu * iommu,struct ivhd_header * h)1327 static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1328 struct ivhd_header *h)
1329 {
1330 u8 *p = (u8 *)h;
1331 u8 *end = p, flags = 0;
1332 u16 devid = 0, devid_start = 0, devid_to = 0, seg_id;
1333 u32 dev_i, ext_flags = 0;
1334 bool alias = false;
1335 struct ivhd_entry *e;
1336 struct amd_iommu_pci_seg *pci_seg = iommu->pci_seg;
1337 u32 ivhd_size;
1338 int ret;
1339
1340
1341 ret = add_early_maps();
1342 if (ret)
1343 return ret;
1344
1345 amd_iommu_apply_ivrs_quirks();
1346
1347 /*
1348 * First save the recommended feature enable bits from ACPI
1349 */
1350 iommu->acpi_flags = h->flags;
1351
1352 /*
1353 * Done. Now parse the device entries
1354 */
1355 ivhd_size = get_ivhd_header_size(h);
1356 if (!ivhd_size) {
1357 pr_err("Unsupported IVHD type %#x\n", h->type);
1358 return -EINVAL;
1359 }
1360
1361 p += ivhd_size;
1362
1363 end += h->length;
1364
1365
1366 while (p < end) {
1367 e = (struct ivhd_entry *)p;
1368 seg_id = pci_seg->id;
1369
1370 switch (e->type) {
1371 case IVHD_DEV_ALL:
1372
1373 DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags);
1374 set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0);
1375 break;
1376 case IVHD_DEV_SELECT:
1377
1378 DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
1379 seg_id, PCI_BUS_NUM(e->devid),
1380 PCI_SLOT(e->devid),
1381 PCI_FUNC(e->devid),
1382 e->flags);
1383
1384 devid = e->devid;
1385 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1386 break;
1387 case IVHD_DEV_SELECT_RANGE_START:
1388
1389 DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
1390 seg_id, PCI_BUS_NUM(e->devid),
1391 PCI_SLOT(e->devid),
1392 PCI_FUNC(e->devid),
1393 e->flags);
1394
1395 devid_start = e->devid;
1396 flags = e->flags;
1397 ext_flags = 0;
1398 alias = false;
1399 break;
1400 case IVHD_DEV_ALIAS:
1401
1402 DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n",
1403 seg_id, PCI_BUS_NUM(e->devid),
1404 PCI_SLOT(e->devid),
1405 PCI_FUNC(e->devid),
1406 e->flags,
1407 PCI_BUS_NUM(e->ext >> 8),
1408 PCI_SLOT(e->ext >> 8),
1409 PCI_FUNC(e->ext >> 8));
1410
1411 devid = e->devid;
1412 devid_to = e->ext >> 8;
1413 set_dev_entry_from_acpi(iommu, devid , e->flags, 0);
1414 set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1415 pci_seg->alias_table[devid] = devid_to;
1416 break;
1417 case IVHD_DEV_ALIAS_RANGE:
1418
1419 DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n",
1420 seg_id, PCI_BUS_NUM(e->devid),
1421 PCI_SLOT(e->devid),
1422 PCI_FUNC(e->devid),
1423 e->flags,
1424 seg_id, PCI_BUS_NUM(e->ext >> 8),
1425 PCI_SLOT(e->ext >> 8),
1426 PCI_FUNC(e->ext >> 8));
1427
1428 devid_start = e->devid;
1429 flags = e->flags;
1430 devid_to = e->ext >> 8;
1431 ext_flags = 0;
1432 alias = true;
1433 break;
1434 case IVHD_DEV_EXT_SELECT:
1435
1436 DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
1437 seg_id, PCI_BUS_NUM(e->devid),
1438 PCI_SLOT(e->devid),
1439 PCI_FUNC(e->devid),
1440 e->flags, e->ext);
1441
1442 devid = e->devid;
1443 set_dev_entry_from_acpi(iommu, devid, e->flags,
1444 e->ext);
1445 break;
1446 case IVHD_DEV_EXT_SELECT_RANGE:
1447
1448 DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
1449 seg_id, PCI_BUS_NUM(e->devid),
1450 PCI_SLOT(e->devid),
1451 PCI_FUNC(e->devid),
1452 e->flags, e->ext);
1453
1454 devid_start = e->devid;
1455 flags = e->flags;
1456 ext_flags = e->ext;
1457 alias = false;
1458 break;
1459 case IVHD_DEV_RANGE_END:
1460
1461 DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n",
1462 seg_id, PCI_BUS_NUM(e->devid),
1463 PCI_SLOT(e->devid),
1464 PCI_FUNC(e->devid));
1465
1466 devid = e->devid;
1467 for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1468 if (alias)
1469 pci_seg->alias_table[dev_i] = devid_to;
1470 }
1471 set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags);
1472 set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags);
1473 break;
1474 case IVHD_DEV_SPECIAL: {
1475 u8 handle, type;
1476 const char *var;
1477 u32 devid;
1478 int ret;
1479
1480 handle = e->ext & 0xff;
1481 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, (e->ext >> 8));
1482 type = (e->ext >> 24) & 0xff;
1483
1484 if (type == IVHD_SPECIAL_IOAPIC)
1485 var = "IOAPIC";
1486 else if (type == IVHD_SPECIAL_HPET)
1487 var = "HPET";
1488 else
1489 var = "UNKNOWN";
1490
1491 DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
1492 var, (int)handle,
1493 seg_id, PCI_BUS_NUM(devid),
1494 PCI_SLOT(devid),
1495 PCI_FUNC(devid),
1496 e->flags);
1497
1498 ret = add_special_device(type, handle, &devid, false);
1499 if (ret)
1500 return ret;
1501
1502 /*
1503 * add_special_device might update the devid in case a
1504 * command-line override is present. So call
1505 * set_dev_entry_from_acpi after add_special_device.
1506 */
1507 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1508
1509 break;
1510 }
1511 case IVHD_DEV_ACPI_HID: {
1512 u32 devid;
1513 u8 hid[ACPIHID_HID_LEN];
1514 u8 uid[ACPIHID_UID_LEN];
1515 int ret;
1516
1517 if (h->type != 0x40) {
1518 pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1519 e->type);
1520 break;
1521 }
1522
1523 BUILD_BUG_ON(sizeof(e->ext_hid) != ACPIHID_HID_LEN - 1);
1524 memcpy(hid, &e->ext_hid, ACPIHID_HID_LEN - 1);
1525 hid[ACPIHID_HID_LEN - 1] = '\0';
1526
1527 if (!(*hid)) {
1528 pr_err(FW_BUG "Invalid HID.\n");
1529 break;
1530 }
1531
1532 uid[0] = '\0';
1533 switch (e->uidf) {
1534 case UID_NOT_PRESENT:
1535
1536 if (e->uidl != 0)
1537 pr_warn(FW_BUG "Invalid UID length.\n");
1538
1539 break;
1540 case UID_IS_INTEGER:
1541
1542 sprintf(uid, "%d", e->uid);
1543
1544 break;
1545 case UID_IS_CHARACTER:
1546
1547 memcpy(uid, &e->uid, e->uidl);
1548 uid[e->uidl] = '\0';
1549
1550 break;
1551 default:
1552 break;
1553 }
1554
1555 devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
1556 DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
1557 hid, uid, seg_id,
1558 PCI_BUS_NUM(devid),
1559 PCI_SLOT(devid),
1560 PCI_FUNC(devid),
1561 e->flags);
1562
1563 flags = e->flags;
1564
1565 ret = add_acpi_hid_device(hid, uid, &devid, false);
1566 if (ret)
1567 return ret;
1568
1569 /*
1570 * add_special_device might update the devid in case a
1571 * command-line override is present. So call
1572 * set_dev_entry_from_acpi after add_special_device.
1573 */
1574 set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1575
1576 break;
1577 }
1578 default:
1579 break;
1580 }
1581
1582 p += ivhd_entry_length(p);
1583 }
1584
1585 return 0;
1586 }
1587
1588 /* Allocate PCI segment data structure */
alloc_pci_segment(u16 id,struct acpi_table_header * ivrs_base)1589 static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
1590 struct acpi_table_header *ivrs_base)
1591 {
1592 struct amd_iommu_pci_seg *pci_seg;
1593 int last_bdf;
1594
1595 /*
1596 * First parse ACPI tables to find the largest Bus/Dev/Func we need to
1597 * handle in this PCI segment. Upon this information the shared data
1598 * structures for the PCI segments in the system will be allocated.
1599 */
1600 last_bdf = find_last_devid_acpi(ivrs_base, id);
1601 if (last_bdf < 0)
1602 return NULL;
1603
1604 pci_seg = kzalloc(sizeof(struct amd_iommu_pci_seg), GFP_KERNEL);
1605 if (pci_seg == NULL)
1606 return NULL;
1607
1608 pci_seg->last_bdf = last_bdf;
1609 DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
1610 pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
1611 pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
1612 pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
1613
1614 pci_seg->id = id;
1615 init_llist_head(&pci_seg->dev_data_list);
1616 INIT_LIST_HEAD(&pci_seg->unity_map);
1617 list_add_tail(&pci_seg->list, &amd_iommu_pci_seg_list);
1618
1619 if (alloc_dev_table(pci_seg))
1620 return NULL;
1621 if (alloc_alias_table(pci_seg))
1622 return NULL;
1623 if (alloc_rlookup_table(pci_seg))
1624 return NULL;
1625
1626 return pci_seg;
1627 }
1628
get_pci_segment(u16 id,struct acpi_table_header * ivrs_base)1629 static struct amd_iommu_pci_seg *__init get_pci_segment(u16 id,
1630 struct acpi_table_header *ivrs_base)
1631 {
1632 struct amd_iommu_pci_seg *pci_seg;
1633
1634 for_each_pci_segment(pci_seg) {
1635 if (pci_seg->id == id)
1636 return pci_seg;
1637 }
1638
1639 return alloc_pci_segment(id, ivrs_base);
1640 }
1641
free_pci_segments(void)1642 static void __init free_pci_segments(void)
1643 {
1644 struct amd_iommu_pci_seg *pci_seg, *next;
1645
1646 for_each_pci_segment_safe(pci_seg, next) {
1647 list_del(&pci_seg->list);
1648 free_irq_lookup_table(pci_seg);
1649 free_rlookup_table(pci_seg);
1650 free_alias_table(pci_seg);
1651 free_dev_table(pci_seg);
1652 kfree(pci_seg);
1653 }
1654 }
1655
free_sysfs(struct amd_iommu * iommu)1656 static void __init free_sysfs(struct amd_iommu *iommu)
1657 {
1658 if (iommu->iommu.dev) {
1659 iommu_device_unregister(&iommu->iommu);
1660 iommu_device_sysfs_remove(&iommu->iommu);
1661 }
1662 }
1663
free_iommu_one(struct amd_iommu * iommu)1664 static void __init free_iommu_one(struct amd_iommu *iommu)
1665 {
1666 free_sysfs(iommu);
1667 free_cwwb_sem(iommu);
1668 free_command_buffer(iommu);
1669 free_event_buffer(iommu);
1670 amd_iommu_free_ppr_log(iommu);
1671 free_ga_log(iommu);
1672 iommu_unmap_mmio_space(iommu);
1673 amd_iommu_iopf_uninit(iommu);
1674 }
1675
free_iommu_all(void)1676 static void __init free_iommu_all(void)
1677 {
1678 struct amd_iommu *iommu, *next;
1679
1680 for_each_iommu_safe(iommu, next) {
1681 list_del(&iommu->list);
1682 free_iommu_one(iommu);
1683 kfree(iommu);
1684 }
1685 }
1686
1687 /*
1688 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1689 * Workaround:
1690 * BIOS should disable L2B micellaneous clock gating by setting
1691 * L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1692 */
amd_iommu_erratum_746_workaround(struct amd_iommu * iommu)1693 static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1694 {
1695 u32 value;
1696
1697 if ((boot_cpu_data.x86 != 0x15) ||
1698 (boot_cpu_data.x86_model < 0x10) ||
1699 (boot_cpu_data.x86_model > 0x1f))
1700 return;
1701
1702 pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1703 pci_read_config_dword(iommu->dev, 0xf4, &value);
1704
1705 if (value & BIT(2))
1706 return;
1707
1708 /* Select NB indirect register 0x90 and enable writing */
1709 pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1710
1711 pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1712 pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1713
1714 /* Clear the enable writing bit */
1715 pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1716 }
1717
1718 /*
1719 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1720 * Workaround:
1721 * BIOS should enable ATS write permission check by setting
1722 * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1723 */
amd_iommu_ats_write_check_workaround(struct amd_iommu * iommu)1724 static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1725 {
1726 u32 value;
1727
1728 if ((boot_cpu_data.x86 != 0x15) ||
1729 (boot_cpu_data.x86_model < 0x30) ||
1730 (boot_cpu_data.x86_model > 0x3f))
1731 return;
1732
1733 /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1734 value = iommu_read_l2(iommu, 0x47);
1735
1736 if (value & BIT(0))
1737 return;
1738
1739 /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1740 iommu_write_l2(iommu, 0x47, value | BIT(0));
1741
1742 pci_info(iommu->dev, "Applying ATS write check workaround\n");
1743 }
1744
1745 /*
1746 * This function glues the initialization function for one IOMMU
1747 * together and also allocates the command buffer and programs the
1748 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1749 */
init_iommu_one(struct amd_iommu * iommu,struct ivhd_header * h,struct acpi_table_header * ivrs_base)1750 static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
1751 struct acpi_table_header *ivrs_base)
1752 {
1753 struct amd_iommu_pci_seg *pci_seg;
1754
1755 pci_seg = get_pci_segment(h->pci_seg, ivrs_base);
1756 if (pci_seg == NULL)
1757 return -ENOMEM;
1758 iommu->pci_seg = pci_seg;
1759
1760 raw_spin_lock_init(&iommu->lock);
1761 atomic64_set(&iommu->cmd_sem_val, 0);
1762
1763 /* Add IOMMU to internal data structures */
1764 list_add_tail(&iommu->list, &amd_iommu_list);
1765 iommu->index = amd_iommus_present++;
1766
1767 if (unlikely(iommu->index >= MAX_IOMMUS)) {
1768 WARN(1, "System has more IOMMUs than supported by this driver\n");
1769 return -ENOSYS;
1770 }
1771
1772 /*
1773 * Copy data from ACPI table entry to the iommu struct
1774 */
1775 iommu->devid = h->devid;
1776 iommu->cap_ptr = h->cap_ptr;
1777 iommu->mmio_phys = h->mmio_phys;
1778
1779 switch (h->type) {
1780 case 0x10:
1781 /* Check if IVHD EFR contains proper max banks/counters */
1782 if ((h->efr_attr != 0) &&
1783 ((h->efr_attr & (0xF << 13)) != 0) &&
1784 ((h->efr_attr & (0x3F << 17)) != 0))
1785 iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1786 else
1787 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1788
1789 /* GAM requires GA mode. */
1790 if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)
1791 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1792 break;
1793 case 0x11:
1794 case 0x40:
1795 if (h->efr_reg & (1 << 9))
1796 iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1797 else
1798 iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1799
1800 /* XT and GAM require GA mode. */
1801 if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) {
1802 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1803 break;
1804 }
1805
1806 if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
1807 amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1808
1809 early_iommu_features_init(iommu, h);
1810
1811 break;
1812 default:
1813 return -EINVAL;
1814 }
1815
1816 iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1817 iommu->mmio_phys_end);
1818 if (!iommu->mmio_base)
1819 return -ENOMEM;
1820
1821 return init_iommu_from_acpi(iommu, h);
1822 }
1823
init_iommu_one_late(struct amd_iommu * iommu)1824 static int __init init_iommu_one_late(struct amd_iommu *iommu)
1825 {
1826 int ret;
1827
1828 if (alloc_cwwb_sem(iommu))
1829 return -ENOMEM;
1830
1831 if (alloc_command_buffer(iommu))
1832 return -ENOMEM;
1833
1834 if (alloc_event_buffer(iommu))
1835 return -ENOMEM;
1836
1837 iommu->int_enabled = false;
1838
1839 init_translation_status(iommu);
1840 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1841 iommu_disable(iommu);
1842 clear_translation_pre_enabled(iommu);
1843 pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1844 iommu->index);
1845 }
1846 if (amd_iommu_pre_enabled)
1847 amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1848
1849 if (amd_iommu_irq_remap) {
1850 ret = amd_iommu_create_irq_domain(iommu);
1851 if (ret)
1852 return ret;
1853 }
1854
1855 /*
1856 * Make sure IOMMU is not considered to translate itself. The IVRS
1857 * table tells us so, but this is a lie!
1858 */
1859 iommu->pci_seg->rlookup_table[iommu->devid] = NULL;
1860
1861 return 0;
1862 }
1863
1864 /**
1865 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1866 * @ivrs: Pointer to the IVRS header
1867 *
1868 * This function search through all IVDB of the maximum supported IVHD
1869 */
get_highest_supported_ivhd_type(struct acpi_table_header * ivrs)1870 static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1871 {
1872 u8 *base = (u8 *)ivrs;
1873 struct ivhd_header *ivhd = (struct ivhd_header *)
1874 (base + IVRS_HEADER_LENGTH);
1875 u8 last_type = ivhd->type;
1876 u16 devid = ivhd->devid;
1877
1878 while (((u8 *)ivhd - base < ivrs->length) &&
1879 (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1880 u8 *p = (u8 *) ivhd;
1881
1882 if (ivhd->devid == devid)
1883 last_type = ivhd->type;
1884 ivhd = (struct ivhd_header *)(p + ivhd->length);
1885 }
1886
1887 return last_type;
1888 }
1889
1890 /*
1891 * Iterates over all IOMMU entries in the ACPI table, allocates the
1892 * IOMMU structure and initializes it with init_iommu_one()
1893 */
init_iommu_all(struct acpi_table_header * table)1894 static int __init init_iommu_all(struct acpi_table_header *table)
1895 {
1896 u8 *p = (u8 *)table, *end = (u8 *)table;
1897 struct ivhd_header *h;
1898 struct amd_iommu *iommu;
1899 int ret;
1900
1901 end += table->length;
1902 p += IVRS_HEADER_LENGTH;
1903
1904 /* Phase 1: Process all IVHD blocks */
1905 while (p < end) {
1906 h = (struct ivhd_header *)p;
1907 if (*p == amd_iommu_target_ivhd_type) {
1908
1909 DUMP_printk("device: %04x:%02x:%02x.%01x cap: %04x "
1910 "flags: %01x info %04x\n",
1911 h->pci_seg, PCI_BUS_NUM(h->devid),
1912 PCI_SLOT(h->devid), PCI_FUNC(h->devid),
1913 h->cap_ptr, h->flags, h->info);
1914 DUMP_printk(" mmio-addr: %016llx\n",
1915 h->mmio_phys);
1916
1917 iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1918 if (iommu == NULL)
1919 return -ENOMEM;
1920
1921 ret = init_iommu_one(iommu, h, table);
1922 if (ret)
1923 return ret;
1924 }
1925 p += h->length;
1926
1927 }
1928 WARN_ON(p != end);
1929
1930 /* Phase 2 : Early feature support check */
1931 get_global_efr();
1932
1933 /* Phase 3 : Enabling IOMMU features */
1934 for_each_iommu(iommu) {
1935 ret = init_iommu_one_late(iommu);
1936 if (ret)
1937 return ret;
1938 }
1939
1940 return 0;
1941 }
1942
init_iommu_perf_ctr(struct amd_iommu * iommu)1943 static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1944 {
1945 u64 val;
1946 struct pci_dev *pdev = iommu->dev;
1947
1948 if (!check_feature(FEATURE_PC))
1949 return;
1950
1951 amd_iommu_pc_present = true;
1952
1953 pci_info(pdev, "IOMMU performance counters supported\n");
1954
1955 val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1956 iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1957 iommu->max_counters = (u8) ((val >> 7) & 0xf);
1958
1959 return;
1960 }
1961
amd_iommu_show_cap(struct device * dev,struct device_attribute * attr,char * buf)1962 static ssize_t amd_iommu_show_cap(struct device *dev,
1963 struct device_attribute *attr,
1964 char *buf)
1965 {
1966 struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1967 return sysfs_emit(buf, "%x\n", iommu->cap);
1968 }
1969 static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1970
amd_iommu_show_features(struct device * dev,struct device_attribute * attr,char * buf)1971 static ssize_t amd_iommu_show_features(struct device *dev,
1972 struct device_attribute *attr,
1973 char *buf)
1974 {
1975 return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2);
1976 }
1977 static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1978
1979 static struct attribute *amd_iommu_attrs[] = {
1980 &dev_attr_cap.attr,
1981 &dev_attr_features.attr,
1982 NULL,
1983 };
1984
1985 static struct attribute_group amd_iommu_group = {
1986 .name = "amd-iommu",
1987 .attrs = amd_iommu_attrs,
1988 };
1989
1990 static const struct attribute_group *amd_iommu_groups[] = {
1991 &amd_iommu_group,
1992 NULL,
1993 };
1994
1995 /*
1996 * Note: IVHD 0x11 and 0x40 also contains exact copy
1997 * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
1998 * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
1999 */
late_iommu_features_init(struct amd_iommu * iommu)2000 static void __init late_iommu_features_init(struct amd_iommu *iommu)
2001 {
2002 u64 features, features2;
2003
2004 if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
2005 return;
2006
2007 /* read extended feature bits */
2008 features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
2009 features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
2010
2011 if (!amd_iommu_efr) {
2012 amd_iommu_efr = features;
2013 amd_iommu_efr2 = features2;
2014 return;
2015 }
2016
2017 /*
2018 * Sanity check and warn if EFR values from
2019 * IVHD and MMIO conflict.
2020 */
2021 if (features != amd_iommu_efr ||
2022 features2 != amd_iommu_efr2) {
2023 pr_warn(FW_WARN
2024 "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
2025 features, amd_iommu_efr,
2026 features2, amd_iommu_efr2);
2027 }
2028 }
2029
iommu_init_pci(struct amd_iommu * iommu)2030 static int __init iommu_init_pci(struct amd_iommu *iommu)
2031 {
2032 int cap_ptr = iommu->cap_ptr;
2033 int ret;
2034
2035 iommu->dev = pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2036 PCI_BUS_NUM(iommu->devid),
2037 iommu->devid & 0xff);
2038 if (!iommu->dev)
2039 return -ENODEV;
2040
2041 /* Prevent binding other PCI device drivers to IOMMU devices */
2042 iommu->dev->match_driver = false;
2043
2044 /* ACPI _PRT won't have an IRQ for IOMMU */
2045 iommu->dev->irq_managed = 1;
2046
2047 pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
2048 &iommu->cap);
2049
2050 if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
2051 amd_iommu_iotlb_sup = false;
2052
2053 late_iommu_features_init(iommu);
2054
2055 if (check_feature(FEATURE_GT)) {
2056 int glxval;
2057 u64 pasmax;
2058
2059 pasmax = FIELD_GET(FEATURE_PASMAX, amd_iommu_efr);
2060 iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
2061
2062 BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
2063
2064 glxval = FIELD_GET(FEATURE_GLX, amd_iommu_efr);
2065
2066 if (amd_iommu_max_glx_val == -1)
2067 amd_iommu_max_glx_val = glxval;
2068 else
2069 amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
2070
2071 iommu_enable_gt(iommu);
2072 }
2073
2074 if (check_feature(FEATURE_PPR) && amd_iommu_alloc_ppr_log(iommu))
2075 return -ENOMEM;
2076
2077 if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
2078 pr_info("Using strict mode due to virtualization\n");
2079 iommu_set_dma_strict();
2080 amd_iommu_np_cache = true;
2081 }
2082
2083 init_iommu_perf_ctr(iommu);
2084
2085 if (is_rd890_iommu(iommu->dev)) {
2086 int i, j;
2087
2088 iommu->root_pdev =
2089 pci_get_domain_bus_and_slot(iommu->pci_seg->id,
2090 iommu->dev->bus->number,
2091 PCI_DEVFN(0, 0));
2092
2093 /*
2094 * Some rd890 systems may not be fully reconfigured by the
2095 * BIOS, so it's necessary for us to store this information so
2096 * it can be reprogrammed on resume
2097 */
2098 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
2099 &iommu->stored_addr_lo);
2100 pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
2101 &iommu->stored_addr_hi);
2102
2103 /* Low bit locks writes to configuration space */
2104 iommu->stored_addr_lo &= ~1;
2105
2106 for (i = 0; i < 6; i++)
2107 for (j = 0; j < 0x12; j++)
2108 iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
2109
2110 for (i = 0; i < 0x83; i++)
2111 iommu->stored_l2[i] = iommu_read_l2(iommu, i);
2112 }
2113
2114 amd_iommu_erratum_746_workaround(iommu);
2115 amd_iommu_ats_write_check_workaround(iommu);
2116
2117 ret = iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
2118 amd_iommu_groups, "ivhd%d", iommu->index);
2119 if (ret)
2120 return ret;
2121
2122 /*
2123 * Allocate per IOMMU IOPF queue here so that in attach device path,
2124 * PRI capable device can be added to IOPF queue
2125 */
2126 if (amd_iommu_gt_ppr_supported()) {
2127 ret = amd_iommu_iopf_init(iommu);
2128 if (ret)
2129 return ret;
2130 }
2131
2132 iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
2133
2134 return pci_enable_device(iommu->dev);
2135 }
2136
print_iommu_info(void)2137 static void print_iommu_info(void)
2138 {
2139 int i;
2140 static const char * const feat_str[] = {
2141 "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
2142 "IA", "GA", "HE", "PC"
2143 };
2144
2145 if (amd_iommu_efr) {
2146 pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2);
2147
2148 for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
2149 if (check_feature(1ULL << i))
2150 pr_cont(" %s", feat_str[i]);
2151 }
2152
2153 if (check_feature(FEATURE_GAM_VAPIC))
2154 pr_cont(" GA_vAPIC");
2155
2156 if (check_feature(FEATURE_SNP))
2157 pr_cont(" SNP");
2158
2159 pr_cont("\n");
2160 }
2161
2162 if (irq_remapping_enabled) {
2163 pr_info("Interrupt remapping enabled\n");
2164 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2165 pr_info("X2APIC enabled\n");
2166 }
2167 if (amd_iommu_pgtable == PD_MODE_V2) {
2168 pr_info("V2 page table enabled (Paging mode : %d level)\n",
2169 amd_iommu_gpt_level);
2170 }
2171 }
2172
amd_iommu_init_pci(void)2173 static int __init amd_iommu_init_pci(void)
2174 {
2175 struct amd_iommu *iommu;
2176 struct amd_iommu_pci_seg *pci_seg;
2177 int ret;
2178
2179 /* Init global identity domain before registering IOMMU */
2180 amd_iommu_init_identity_domain();
2181
2182 for_each_iommu(iommu) {
2183 ret = iommu_init_pci(iommu);
2184 if (ret) {
2185 pr_err("IOMMU%d: Failed to initialize IOMMU Hardware (error=%d)!\n",
2186 iommu->index, ret);
2187 goto out;
2188 }
2189 /* Need to setup range after PCI init */
2190 iommu_set_cwwb_range(iommu);
2191 }
2192
2193 /*
2194 * Order is important here to make sure any unity map requirements are
2195 * fulfilled. The unity mappings are created and written to the device
2196 * table during the iommu_init_pci() call.
2197 *
2198 * After that we call init_device_table_dma() to make sure any
2199 * uninitialized DTE will block DMA, and in the end we flush the caches
2200 * of all IOMMUs to make sure the changes to the device table are
2201 * active.
2202 */
2203 for_each_pci_segment(pci_seg)
2204 init_device_table_dma(pci_seg);
2205
2206 for_each_iommu(iommu)
2207 amd_iommu_flush_all_caches(iommu);
2208
2209 print_iommu_info();
2210
2211 out:
2212 return ret;
2213 }
2214
2215 /****************************************************************************
2216 *
2217 * The following functions initialize the MSI interrupts for all IOMMUs
2218 * in the system. It's a bit challenging because there could be multiple
2219 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
2220 * pci_dev.
2221 *
2222 ****************************************************************************/
2223
iommu_setup_msi(struct amd_iommu * iommu)2224 static int iommu_setup_msi(struct amd_iommu *iommu)
2225 {
2226 int r;
2227
2228 r = pci_enable_msi(iommu->dev);
2229 if (r)
2230 return r;
2231
2232 r = request_threaded_irq(iommu->dev->irq,
2233 amd_iommu_int_handler,
2234 amd_iommu_int_thread,
2235 0, "AMD-Vi",
2236 iommu);
2237
2238 if (r) {
2239 pci_disable_msi(iommu->dev);
2240 return r;
2241 }
2242
2243 return 0;
2244 }
2245
2246 union intcapxt {
2247 u64 capxt;
2248 struct {
2249 u64 reserved_0 : 2,
2250 dest_mode_logical : 1,
2251 reserved_1 : 5,
2252 destid_0_23 : 24,
2253 vector : 8,
2254 reserved_2 : 16,
2255 destid_24_31 : 8;
2256 };
2257 } __attribute__ ((packed));
2258
2259
2260 static struct irq_chip intcapxt_controller;
2261
intcapxt_irqdomain_activate(struct irq_domain * domain,struct irq_data * irqd,bool reserve)2262 static int intcapxt_irqdomain_activate(struct irq_domain *domain,
2263 struct irq_data *irqd, bool reserve)
2264 {
2265 return 0;
2266 }
2267
intcapxt_irqdomain_deactivate(struct irq_domain * domain,struct irq_data * irqd)2268 static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
2269 struct irq_data *irqd)
2270 {
2271 }
2272
2273
intcapxt_irqdomain_alloc(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs,void * arg)2274 static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
2275 unsigned int nr_irqs, void *arg)
2276 {
2277 struct irq_alloc_info *info = arg;
2278 int i, ret;
2279
2280 if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
2281 return -EINVAL;
2282
2283 ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
2284 if (ret < 0)
2285 return ret;
2286
2287 for (i = virq; i < virq + nr_irqs; i++) {
2288 struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
2289
2290 irqd->chip = &intcapxt_controller;
2291 irqd->hwirq = info->hwirq;
2292 irqd->chip_data = info->data;
2293 __irq_set_handler(i, handle_edge_irq, 0, "edge");
2294 }
2295
2296 return ret;
2297 }
2298
intcapxt_irqdomain_free(struct irq_domain * domain,unsigned int virq,unsigned int nr_irqs)2299 static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
2300 unsigned int nr_irqs)
2301 {
2302 irq_domain_free_irqs_top(domain, virq, nr_irqs);
2303 }
2304
2305
intcapxt_unmask_irq(struct irq_data * irqd)2306 static void intcapxt_unmask_irq(struct irq_data *irqd)
2307 {
2308 struct amd_iommu *iommu = irqd->chip_data;
2309 struct irq_cfg *cfg = irqd_cfg(irqd);
2310 union intcapxt xt;
2311
2312 xt.capxt = 0ULL;
2313 xt.dest_mode_logical = apic->dest_mode_logical;
2314 xt.vector = cfg->vector;
2315 xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
2316 xt.destid_24_31 = cfg->dest_apicid >> 24;
2317
2318 writeq(xt.capxt, iommu->mmio_base + irqd->hwirq);
2319 }
2320
intcapxt_mask_irq(struct irq_data * irqd)2321 static void intcapxt_mask_irq(struct irq_data *irqd)
2322 {
2323 struct amd_iommu *iommu = irqd->chip_data;
2324
2325 writeq(0, iommu->mmio_base + irqd->hwirq);
2326 }
2327
2328
intcapxt_set_affinity(struct irq_data * irqd,const struct cpumask * mask,bool force)2329 static int intcapxt_set_affinity(struct irq_data *irqd,
2330 const struct cpumask *mask, bool force)
2331 {
2332 struct irq_data *parent = irqd->parent_data;
2333 int ret;
2334
2335 ret = parent->chip->irq_set_affinity(parent, mask, force);
2336 if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
2337 return ret;
2338 return 0;
2339 }
2340
intcapxt_set_wake(struct irq_data * irqd,unsigned int on)2341 static int intcapxt_set_wake(struct irq_data *irqd, unsigned int on)
2342 {
2343 return on ? -EOPNOTSUPP : 0;
2344 }
2345
2346 static struct irq_chip intcapxt_controller = {
2347 .name = "IOMMU-MSI",
2348 .irq_unmask = intcapxt_unmask_irq,
2349 .irq_mask = intcapxt_mask_irq,
2350 .irq_ack = irq_chip_ack_parent,
2351 .irq_retrigger = irq_chip_retrigger_hierarchy,
2352 .irq_set_affinity = intcapxt_set_affinity,
2353 .irq_set_wake = intcapxt_set_wake,
2354 .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED,
2355 };
2356
2357 static const struct irq_domain_ops intcapxt_domain_ops = {
2358 .alloc = intcapxt_irqdomain_alloc,
2359 .free = intcapxt_irqdomain_free,
2360 .activate = intcapxt_irqdomain_activate,
2361 .deactivate = intcapxt_irqdomain_deactivate,
2362 };
2363
2364
2365 static struct irq_domain *iommu_irqdomain;
2366
iommu_get_irqdomain(void)2367 static struct irq_domain *iommu_get_irqdomain(void)
2368 {
2369 struct fwnode_handle *fn;
2370
2371 /* No need for locking here (yet) as the init is single-threaded */
2372 if (iommu_irqdomain)
2373 return iommu_irqdomain;
2374
2375 fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
2376 if (!fn)
2377 return NULL;
2378
2379 iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
2380 fn, &intcapxt_domain_ops,
2381 NULL);
2382 if (!iommu_irqdomain)
2383 irq_domain_free_fwnode(fn);
2384
2385 return iommu_irqdomain;
2386 }
2387
__iommu_setup_intcapxt(struct amd_iommu * iommu,const char * devname,int hwirq,irq_handler_t thread_fn)2388 static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname,
2389 int hwirq, irq_handler_t thread_fn)
2390 {
2391 struct irq_domain *domain;
2392 struct irq_alloc_info info;
2393 int irq, ret;
2394 int node = dev_to_node(&iommu->dev->dev);
2395
2396 domain = iommu_get_irqdomain();
2397 if (!domain)
2398 return -ENXIO;
2399
2400 init_irq_alloc_info(&info, NULL);
2401 info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
2402 info.data = iommu;
2403 info.hwirq = hwirq;
2404
2405 irq = irq_domain_alloc_irqs(domain, 1, node, &info);
2406 if (irq < 0) {
2407 irq_domain_remove(domain);
2408 return irq;
2409 }
2410
2411 ret = request_threaded_irq(irq, amd_iommu_int_handler,
2412 thread_fn, 0, devname, iommu);
2413 if (ret) {
2414 irq_domain_free_irqs(irq, 1);
2415 irq_domain_remove(domain);
2416 return ret;
2417 }
2418
2419 return 0;
2420 }
2421
iommu_setup_intcapxt(struct amd_iommu * iommu)2422 static int iommu_setup_intcapxt(struct amd_iommu *iommu)
2423 {
2424 int ret;
2425
2426 snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name),
2427 "AMD-Vi%d-Evt", iommu->index);
2428 ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name,
2429 MMIO_INTCAPXT_EVT_OFFSET,
2430 amd_iommu_int_thread_evtlog);
2431 if (ret)
2432 return ret;
2433
2434 snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name),
2435 "AMD-Vi%d-PPR", iommu->index);
2436 ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name,
2437 MMIO_INTCAPXT_PPR_OFFSET,
2438 amd_iommu_int_thread_pprlog);
2439 if (ret)
2440 return ret;
2441
2442 #ifdef CONFIG_IRQ_REMAP
2443 snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name),
2444 "AMD-Vi%d-GA", iommu->index);
2445 ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name,
2446 MMIO_INTCAPXT_GALOG_OFFSET,
2447 amd_iommu_int_thread_galog);
2448 #endif
2449
2450 return ret;
2451 }
2452
iommu_init_irq(struct amd_iommu * iommu)2453 static int iommu_init_irq(struct amd_iommu *iommu)
2454 {
2455 int ret;
2456
2457 if (iommu->int_enabled)
2458 goto enable_faults;
2459
2460 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2461 ret = iommu_setup_intcapxt(iommu);
2462 else if (iommu->dev->msi_cap)
2463 ret = iommu_setup_msi(iommu);
2464 else
2465 ret = -ENODEV;
2466
2467 if (ret)
2468 return ret;
2469
2470 iommu->int_enabled = true;
2471 enable_faults:
2472
2473 if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
2474 iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2475
2476 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2477
2478 return 0;
2479 }
2480
2481 /****************************************************************************
2482 *
2483 * The next functions belong to the third pass of parsing the ACPI
2484 * table. In this last pass the memory mapping requirements are
2485 * gathered (like exclusion and unity mapping ranges).
2486 *
2487 ****************************************************************************/
2488
free_unity_maps(void)2489 static void __init free_unity_maps(void)
2490 {
2491 struct unity_map_entry *entry, *next;
2492 struct amd_iommu_pci_seg *p, *pci_seg;
2493
2494 for_each_pci_segment_safe(pci_seg, p) {
2495 list_for_each_entry_safe(entry, next, &pci_seg->unity_map, list) {
2496 list_del(&entry->list);
2497 kfree(entry);
2498 }
2499 }
2500 }
2501
2502 /* called for unity map ACPI definition */
init_unity_map_range(struct ivmd_header * m,struct acpi_table_header * ivrs_base)2503 static int __init init_unity_map_range(struct ivmd_header *m,
2504 struct acpi_table_header *ivrs_base)
2505 {
2506 struct unity_map_entry *e = NULL;
2507 struct amd_iommu_pci_seg *pci_seg;
2508 char *s;
2509
2510 pci_seg = get_pci_segment(m->pci_seg, ivrs_base);
2511 if (pci_seg == NULL)
2512 return -ENOMEM;
2513
2514 e = kzalloc(sizeof(*e), GFP_KERNEL);
2515 if (e == NULL)
2516 return -ENOMEM;
2517
2518 switch (m->type) {
2519 default:
2520 kfree(e);
2521 return 0;
2522 case ACPI_IVMD_TYPE:
2523 s = "IVMD_TYPEi\t\t\t";
2524 e->devid_start = e->devid_end = m->devid;
2525 break;
2526 case ACPI_IVMD_TYPE_ALL:
2527 s = "IVMD_TYPE_ALL\t\t";
2528 e->devid_start = 0;
2529 e->devid_end = pci_seg->last_bdf;
2530 break;
2531 case ACPI_IVMD_TYPE_RANGE:
2532 s = "IVMD_TYPE_RANGE\t\t";
2533 e->devid_start = m->devid;
2534 e->devid_end = m->aux;
2535 break;
2536 }
2537 e->address_start = PAGE_ALIGN(m->range_start);
2538 e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2539 e->prot = m->flags >> 1;
2540
2541 /*
2542 * Treat per-device exclusion ranges as r/w unity-mapped regions
2543 * since some buggy BIOSes might lead to the overwritten exclusion
2544 * range (exclusion_start and exclusion_length members). This
2545 * happens when there are multiple exclusion ranges (IVMD entries)
2546 * defined in ACPI table.
2547 */
2548 if (m->flags & IVMD_FLAG_EXCL_RANGE)
2549 e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2550
2551 DUMP_printk("%s devid_start: %04x:%02x:%02x.%x devid_end: "
2552 "%04x:%02x:%02x.%x range_start: %016llx range_end: %016llx"
2553 " flags: %x\n", s, m->pci_seg,
2554 PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2555 PCI_FUNC(e->devid_start), m->pci_seg,
2556 PCI_BUS_NUM(e->devid_end),
2557 PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2558 e->address_start, e->address_end, m->flags);
2559
2560 list_add_tail(&e->list, &pci_seg->unity_map);
2561
2562 return 0;
2563 }
2564
2565 /* iterates over all memory definitions we find in the ACPI table */
init_memory_definitions(struct acpi_table_header * table)2566 static int __init init_memory_definitions(struct acpi_table_header *table)
2567 {
2568 u8 *p = (u8 *)table, *end = (u8 *)table;
2569 struct ivmd_header *m;
2570
2571 end += table->length;
2572 p += IVRS_HEADER_LENGTH;
2573
2574 while (p < end) {
2575 m = (struct ivmd_header *)p;
2576 if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2577 init_unity_map_range(m, table);
2578
2579 p += m->length;
2580 }
2581
2582 return 0;
2583 }
2584
2585 /*
2586 * Init the device table to not allow DMA access for devices
2587 */
init_device_table_dma(struct amd_iommu_pci_seg * pci_seg)2588 static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2589 {
2590 u32 devid;
2591 struct dev_table_entry *dev_table = pci_seg->dev_table;
2592
2593 if (dev_table == NULL)
2594 return;
2595
2596 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2597 set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID);
2598 if (!amd_iommu_snp_en)
2599 set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION);
2600 }
2601 }
2602
uninit_device_table_dma(struct amd_iommu_pci_seg * pci_seg)2603 static void __init uninit_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
2604 {
2605 u32 devid;
2606 struct dev_table_entry *dev_table = pci_seg->dev_table;
2607
2608 if (dev_table == NULL)
2609 return;
2610
2611 for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
2612 dev_table[devid].data[0] = 0ULL;
2613 dev_table[devid].data[1] = 0ULL;
2614 }
2615 }
2616
init_device_table(void)2617 static void init_device_table(void)
2618 {
2619 struct amd_iommu_pci_seg *pci_seg;
2620 u32 devid;
2621
2622 if (!amd_iommu_irq_remap)
2623 return;
2624
2625 for_each_pci_segment(pci_seg) {
2626 for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
2627 set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN);
2628 }
2629 }
2630
iommu_init_flags(struct amd_iommu * iommu)2631 static void iommu_init_flags(struct amd_iommu *iommu)
2632 {
2633 iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2634 iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2635 iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2636
2637 iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2638 iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2639 iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2640
2641 iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2642 iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2643 iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2644
2645 iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2646 iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2647 iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2648
2649 /*
2650 * make IOMMU memory accesses cache coherent
2651 */
2652 iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2653
2654 /* Set IOTLB invalidation timeout to 1s */
2655 iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2656
2657 /* Enable Enhanced Peripheral Page Request Handling */
2658 if (check_feature(FEATURE_EPHSUP))
2659 iommu_feature_enable(iommu, CONTROL_EPH_EN);
2660 }
2661
iommu_apply_resume_quirks(struct amd_iommu * iommu)2662 static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2663 {
2664 int i, j;
2665 u32 ioc_feature_control;
2666 struct pci_dev *pdev = iommu->root_pdev;
2667
2668 /* RD890 BIOSes may not have completely reconfigured the iommu */
2669 if (!is_rd890_iommu(iommu->dev) || !pdev)
2670 return;
2671
2672 /*
2673 * First, we need to ensure that the iommu is enabled. This is
2674 * controlled by a register in the northbridge
2675 */
2676
2677 /* Select Northbridge indirect register 0x75 and enable writing */
2678 pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2679 pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2680
2681 /* Enable the iommu */
2682 if (!(ioc_feature_control & 0x1))
2683 pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2684
2685 /* Restore the iommu BAR */
2686 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2687 iommu->stored_addr_lo);
2688 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2689 iommu->stored_addr_hi);
2690
2691 /* Restore the l1 indirect regs for each of the 6 l1s */
2692 for (i = 0; i < 6; i++)
2693 for (j = 0; j < 0x12; j++)
2694 iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2695
2696 /* Restore the l2 indirect regs */
2697 for (i = 0; i < 0x83; i++)
2698 iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2699
2700 /* Lock PCI setup registers */
2701 pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2702 iommu->stored_addr_lo | 1);
2703 }
2704
iommu_enable_ga(struct amd_iommu * iommu)2705 static void iommu_enable_ga(struct amd_iommu *iommu)
2706 {
2707 #ifdef CONFIG_IRQ_REMAP
2708 switch (amd_iommu_guest_ir) {
2709 case AMD_IOMMU_GUEST_IR_VAPIC:
2710 case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2711 iommu_feature_enable(iommu, CONTROL_GA_EN);
2712 iommu->irte_ops = &irte_128_ops;
2713 break;
2714 default:
2715 iommu->irte_ops = &irte_32_ops;
2716 break;
2717 }
2718 #endif
2719 }
2720
iommu_disable_irtcachedis(struct amd_iommu * iommu)2721 static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
2722 {
2723 iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
2724 }
2725
iommu_enable_irtcachedis(struct amd_iommu * iommu)2726 static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
2727 {
2728 u64 ctrl;
2729
2730 if (!amd_iommu_irtcachedis)
2731 return;
2732
2733 /*
2734 * Note:
2735 * The support for IRTCacheDis feature is dertermined by
2736 * checking if the bit is writable.
2737 */
2738 iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
2739 ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
2740 ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
2741 if (ctrl)
2742 iommu->irtcachedis_enabled = true;
2743 pr_info("iommu%d (%#06x) : IRT cache is %s\n",
2744 iommu->index, iommu->devid,
2745 iommu->irtcachedis_enabled ? "disabled" : "enabled");
2746 }
2747
early_enable_iommu(struct amd_iommu * iommu)2748 static void early_enable_iommu(struct amd_iommu *iommu)
2749 {
2750 iommu_disable(iommu);
2751 iommu_init_flags(iommu);
2752 iommu_set_device_table(iommu);
2753 iommu_enable_command_buffer(iommu);
2754 iommu_enable_event_buffer(iommu);
2755 iommu_set_exclusion_range(iommu);
2756 iommu_enable_gt(iommu);
2757 iommu_enable_ga(iommu);
2758 iommu_enable_xt(iommu);
2759 iommu_enable_irtcachedis(iommu);
2760 iommu_enable(iommu);
2761 amd_iommu_flush_all_caches(iommu);
2762 }
2763
2764 /*
2765 * This function finally enables all IOMMUs found in the system after
2766 * they have been initialized.
2767 *
2768 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2769 * the old content of device table entries. Not this case or copy failed,
2770 * just continue as normal kernel does.
2771 */
early_enable_iommus(void)2772 static void early_enable_iommus(void)
2773 {
2774 struct amd_iommu *iommu;
2775 struct amd_iommu_pci_seg *pci_seg;
2776
2777 if (!copy_device_table()) {
2778 /*
2779 * If come here because of failure in copying device table from old
2780 * kernel with all IOMMUs enabled, print error message and try to
2781 * free allocated old_dev_tbl_cpy.
2782 */
2783 if (amd_iommu_pre_enabled)
2784 pr_err("Failed to copy DEV table from previous kernel.\n");
2785
2786 for_each_pci_segment(pci_seg) {
2787 if (pci_seg->old_dev_tbl_cpy != NULL) {
2788 iommu_free_pages(pci_seg->old_dev_tbl_cpy,
2789 get_order(pci_seg->dev_table_size));
2790 pci_seg->old_dev_tbl_cpy = NULL;
2791 }
2792 }
2793
2794 for_each_iommu(iommu) {
2795 clear_translation_pre_enabled(iommu);
2796 early_enable_iommu(iommu);
2797 }
2798 } else {
2799 pr_info("Copied DEV table from previous kernel.\n");
2800
2801 for_each_pci_segment(pci_seg) {
2802 iommu_free_pages(pci_seg->dev_table,
2803 get_order(pci_seg->dev_table_size));
2804 pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
2805 }
2806
2807 for_each_iommu(iommu) {
2808 iommu_disable_command_buffer(iommu);
2809 iommu_disable_event_buffer(iommu);
2810 iommu_disable_irtcachedis(iommu);
2811 iommu_enable_command_buffer(iommu);
2812 iommu_enable_event_buffer(iommu);
2813 iommu_enable_ga(iommu);
2814 iommu_enable_xt(iommu);
2815 iommu_enable_irtcachedis(iommu);
2816 iommu_set_device_table(iommu);
2817 amd_iommu_flush_all_caches(iommu);
2818 }
2819 }
2820 }
2821
enable_iommus_ppr(void)2822 static void enable_iommus_ppr(void)
2823 {
2824 struct amd_iommu *iommu;
2825
2826 if (!amd_iommu_gt_ppr_supported())
2827 return;
2828
2829 for_each_iommu(iommu)
2830 amd_iommu_enable_ppr_log(iommu);
2831 }
2832
enable_iommus_vapic(void)2833 static void enable_iommus_vapic(void)
2834 {
2835 #ifdef CONFIG_IRQ_REMAP
2836 u32 status, i;
2837 struct amd_iommu *iommu;
2838
2839 for_each_iommu(iommu) {
2840 /*
2841 * Disable GALog if already running. It could have been enabled
2842 * in the previous boot before kdump.
2843 */
2844 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2845 if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2846 continue;
2847
2848 iommu_feature_disable(iommu, CONTROL_GALOG_EN);
2849 iommu_feature_disable(iommu, CONTROL_GAINT_EN);
2850
2851 /*
2852 * Need to set and poll check the GALOGRun bit to zero before
2853 * we can set/ modify GA Log registers safely.
2854 */
2855 for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
2856 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
2857 if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
2858 break;
2859 udelay(10);
2860 }
2861
2862 if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
2863 return;
2864 }
2865
2866 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
2867 !check_feature(FEATURE_GAM_VAPIC)) {
2868 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2869 return;
2870 }
2871
2872 if (amd_iommu_snp_en &&
2873 !FEATURE_SNPAVICSUP_GAM(amd_iommu_efr2)) {
2874 pr_warn("Force to disable Virtual APIC due to SNP\n");
2875 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2876 return;
2877 }
2878
2879 /* Enabling GAM and SNPAVIC support */
2880 for_each_iommu(iommu) {
2881 if (iommu_init_ga_log(iommu) ||
2882 iommu_ga_log_enable(iommu))
2883 return;
2884
2885 iommu_feature_enable(iommu, CONTROL_GAM_EN);
2886 if (amd_iommu_snp_en)
2887 iommu_feature_enable(iommu, CONTROL_SNPAVIC_EN);
2888 }
2889
2890 amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2891 pr_info("Virtual APIC enabled\n");
2892 #endif
2893 }
2894
disable_iommus(void)2895 static void disable_iommus(void)
2896 {
2897 struct amd_iommu *iommu;
2898
2899 for_each_iommu(iommu)
2900 iommu_disable(iommu);
2901
2902 #ifdef CONFIG_IRQ_REMAP
2903 if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2904 amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2905 #endif
2906 }
2907
2908 /*
2909 * Suspend/Resume support
2910 * disable suspend until real resume implemented
2911 */
2912
amd_iommu_resume(void)2913 static void amd_iommu_resume(void)
2914 {
2915 struct amd_iommu *iommu;
2916
2917 for_each_iommu(iommu)
2918 iommu_apply_resume_quirks(iommu);
2919
2920 /* re-load the hardware */
2921 for_each_iommu(iommu)
2922 early_enable_iommu(iommu);
2923
2924 amd_iommu_enable_interrupts();
2925 }
2926
amd_iommu_suspend(void)2927 static int amd_iommu_suspend(void)
2928 {
2929 /* disable IOMMUs to go out of the way for BIOS */
2930 disable_iommus();
2931
2932 return 0;
2933 }
2934
2935 static struct syscore_ops amd_iommu_syscore_ops = {
2936 .suspend = amd_iommu_suspend,
2937 .resume = amd_iommu_resume,
2938 };
2939
free_iommu_resources(void)2940 static void __init free_iommu_resources(void)
2941 {
2942 kmem_cache_destroy(amd_iommu_irq_cache);
2943 amd_iommu_irq_cache = NULL;
2944
2945 free_iommu_all();
2946 free_pci_segments();
2947 }
2948
2949 /* SB IOAPIC is always on this device in AMD systems */
2950 #define IOAPIC_SB_DEVID ((0x00 << 8) | PCI_DEVFN(0x14, 0))
2951
check_ioapic_information(void)2952 static bool __init check_ioapic_information(void)
2953 {
2954 const char *fw_bug = FW_BUG;
2955 bool ret, has_sb_ioapic;
2956 int idx;
2957
2958 has_sb_ioapic = false;
2959 ret = false;
2960
2961 /*
2962 * If we have map overrides on the kernel command line the
2963 * messages in this function might not describe firmware bugs
2964 * anymore - so be careful
2965 */
2966 if (cmdline_maps)
2967 fw_bug = "";
2968
2969 for (idx = 0; idx < nr_ioapics; idx++) {
2970 int devid, id = mpc_ioapic_id(idx);
2971
2972 devid = get_ioapic_devid(id);
2973 if (devid < 0) {
2974 pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2975 fw_bug, id);
2976 ret = false;
2977 } else if (devid == IOAPIC_SB_DEVID) {
2978 has_sb_ioapic = true;
2979 ret = true;
2980 }
2981 }
2982
2983 if (!has_sb_ioapic) {
2984 /*
2985 * We expect the SB IOAPIC to be listed in the IVRS
2986 * table. The system timer is connected to the SB IOAPIC
2987 * and if we don't have it in the list the system will
2988 * panic at boot time. This situation usually happens
2989 * when the BIOS is buggy and provides us the wrong
2990 * device id for the IOAPIC in the system.
2991 */
2992 pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2993 }
2994
2995 if (!ret)
2996 pr_err("Disabling interrupt remapping\n");
2997
2998 return ret;
2999 }
3000
free_dma_resources(void)3001 static void __init free_dma_resources(void)
3002 {
3003 ida_destroy(&pdom_ids);
3004
3005 free_unity_maps();
3006 }
3007
ivinfo_init(void * ivrs)3008 static void __init ivinfo_init(void *ivrs)
3009 {
3010 amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
3011 }
3012
3013 /*
3014 * This is the hardware init function for AMD IOMMU in the system.
3015 * This function is called either from amd_iommu_init or from the interrupt
3016 * remapping setup code.
3017 *
3018 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
3019 * four times:
3020 *
3021 * 1 pass) Discover the most comprehensive IVHD type to use.
3022 *
3023 * 2 pass) Find the highest PCI device id the driver has to handle.
3024 * Upon this information the size of the data structures is
3025 * determined that needs to be allocated.
3026 *
3027 * 3 pass) Initialize the data structures just allocated with the
3028 * information in the ACPI table about available AMD IOMMUs
3029 * in the system. It also maps the PCI devices in the
3030 * system to specific IOMMUs
3031 *
3032 * 4 pass) After the basic data structures are allocated and
3033 * initialized we update them with information about memory
3034 * remapping requirements parsed out of the ACPI table in
3035 * this last pass.
3036 *
3037 * After everything is set up the IOMMUs are enabled and the necessary
3038 * hotplug and suspend notifiers are registered.
3039 */
early_amd_iommu_init(void)3040 static int __init early_amd_iommu_init(void)
3041 {
3042 struct acpi_table_header *ivrs_base;
3043 int remap_cache_sz, ret;
3044 acpi_status status;
3045
3046 if (!amd_iommu_detected)
3047 return -ENODEV;
3048
3049 status = acpi_get_table("IVRS", 0, &ivrs_base);
3050 if (status == AE_NOT_FOUND)
3051 return -ENODEV;
3052 else if (ACPI_FAILURE(status)) {
3053 const char *err = acpi_format_exception(status);
3054 pr_err("IVRS table error: %s\n", err);
3055 return -EINVAL;
3056 }
3057
3058 if (!boot_cpu_has(X86_FEATURE_CX16)) {
3059 pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n");
3060 return -EINVAL;
3061 }
3062
3063 /*
3064 * Validate checksum here so we don't need to do it when
3065 * we actually parse the table
3066 */
3067 ret = check_ivrs_checksum(ivrs_base);
3068 if (ret)
3069 goto out;
3070
3071 ivinfo_init(ivrs_base);
3072
3073 amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
3074 DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
3075
3076 /*
3077 * now the data structures are allocated and basically initialized
3078 * start the real acpi table scan
3079 */
3080 ret = init_iommu_all(ivrs_base);
3081 if (ret)
3082 goto out;
3083
3084 /* 5 level guest page table */
3085 if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3086 FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
3087 amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
3088
3089 if (amd_iommu_pgtable == PD_MODE_V2) {
3090 if (!amd_iommu_v2_pgtbl_supported()) {
3091 pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
3092 amd_iommu_pgtable = PD_MODE_V1;
3093 }
3094 }
3095
3096 /* Disable any previously enabled IOMMUs */
3097 if (!is_kdump_kernel() || amd_iommu_disabled)
3098 disable_iommus();
3099
3100 if (amd_iommu_irq_remap)
3101 amd_iommu_irq_remap = check_ioapic_information();
3102
3103 if (amd_iommu_irq_remap) {
3104 struct amd_iommu_pci_seg *pci_seg;
3105 /*
3106 * Interrupt remapping enabled, create kmem_cache for the
3107 * remapping tables.
3108 */
3109 ret = -ENOMEM;
3110 if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
3111 remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
3112 else
3113 remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
3114 amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
3115 remap_cache_sz,
3116 DTE_INTTAB_ALIGNMENT,
3117 0, NULL);
3118 if (!amd_iommu_irq_cache)
3119 goto out;
3120
3121 for_each_pci_segment(pci_seg) {
3122 if (alloc_irq_lookup_table(pci_seg))
3123 goto out;
3124 }
3125 }
3126
3127 ret = init_memory_definitions(ivrs_base);
3128 if (ret)
3129 goto out;
3130
3131 /* init the device table */
3132 init_device_table();
3133
3134 out:
3135 /* Don't leak any ACPI memory */
3136 acpi_put_table(ivrs_base);
3137
3138 return ret;
3139 }
3140
amd_iommu_enable_interrupts(void)3141 static int amd_iommu_enable_interrupts(void)
3142 {
3143 struct amd_iommu *iommu;
3144 int ret = 0;
3145
3146 for_each_iommu(iommu) {
3147 ret = iommu_init_irq(iommu);
3148 if (ret)
3149 goto out;
3150 }
3151
3152 /*
3153 * Interrupt handler is ready to process interrupts. Enable
3154 * PPR and GA log interrupt for all IOMMUs.
3155 */
3156 enable_iommus_vapic();
3157 enable_iommus_ppr();
3158
3159 out:
3160 return ret;
3161 }
3162
detect_ivrs(void)3163 static bool __init detect_ivrs(void)
3164 {
3165 struct acpi_table_header *ivrs_base;
3166 acpi_status status;
3167 int i;
3168
3169 status = acpi_get_table("IVRS", 0, &ivrs_base);
3170 if (status == AE_NOT_FOUND)
3171 return false;
3172 else if (ACPI_FAILURE(status)) {
3173 const char *err = acpi_format_exception(status);
3174 pr_err("IVRS table error: %s\n", err);
3175 return false;
3176 }
3177
3178 acpi_put_table(ivrs_base);
3179
3180 if (amd_iommu_force_enable)
3181 goto out;
3182
3183 /* Don't use IOMMU if there is Stoney Ridge graphics */
3184 for (i = 0; i < 32; i++) {
3185 u32 pci_id;
3186
3187 pci_id = read_pci_config(0, i, 0, 0);
3188 if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
3189 pr_info("Disable IOMMU on Stoney Ridge\n");
3190 return false;
3191 }
3192 }
3193
3194 out:
3195 /* Make sure ACS will be enabled during PCI probe */
3196 pci_request_acs();
3197
3198 return true;
3199 }
3200
iommu_snp_enable(void)3201 static __init void iommu_snp_enable(void)
3202 {
3203 #ifdef CONFIG_KVM_AMD_SEV
3204 if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
3205 return;
3206 /*
3207 * The SNP support requires that IOMMU must be enabled, and is
3208 * configured with V1 page table (DTE[Mode] = 0 is not supported).
3209 */
3210 if (no_iommu || iommu_default_passthrough()) {
3211 pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
3212 goto disable_snp;
3213 }
3214
3215 if (amd_iommu_pgtable != PD_MODE_V1) {
3216 pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
3217 goto disable_snp;
3218 }
3219
3220 amd_iommu_snp_en = check_feature(FEATURE_SNP);
3221 if (!amd_iommu_snp_en) {
3222 pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
3223 goto disable_snp;
3224 }
3225
3226 /*
3227 * Enable host SNP support once SNP support is checked on IOMMU.
3228 */
3229 if (snp_rmptable_init()) {
3230 pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n");
3231 goto disable_snp;
3232 }
3233
3234 pr_info("IOMMU SNP support enabled.\n");
3235 return;
3236
3237 disable_snp:
3238 cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3239 #endif
3240 }
3241
3242 /****************************************************************************
3243 *
3244 * AMD IOMMU Initialization State Machine
3245 *
3246 ****************************************************************************/
3247
state_next(void)3248 static int __init state_next(void)
3249 {
3250 int ret = 0;
3251
3252 switch (init_state) {
3253 case IOMMU_START_STATE:
3254 if (!detect_ivrs()) {
3255 init_state = IOMMU_NOT_FOUND;
3256 ret = -ENODEV;
3257 } else {
3258 init_state = IOMMU_IVRS_DETECTED;
3259 }
3260 break;
3261 case IOMMU_IVRS_DETECTED:
3262 if (amd_iommu_disabled) {
3263 init_state = IOMMU_CMDLINE_DISABLED;
3264 ret = -EINVAL;
3265 } else {
3266 ret = early_amd_iommu_init();
3267 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
3268 }
3269 break;
3270 case IOMMU_ACPI_FINISHED:
3271 early_enable_iommus();
3272 x86_platform.iommu_shutdown = disable_iommus;
3273 init_state = IOMMU_ENABLED;
3274 break;
3275 case IOMMU_ENABLED:
3276 register_syscore_ops(&amd_iommu_syscore_ops);
3277 iommu_snp_enable();
3278 ret = amd_iommu_init_pci();
3279 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
3280 break;
3281 case IOMMU_PCI_INIT:
3282 ret = amd_iommu_enable_interrupts();
3283 init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
3284 break;
3285 case IOMMU_INTERRUPTS_EN:
3286 init_state = IOMMU_INITIALIZED;
3287 break;
3288 case IOMMU_INITIALIZED:
3289 /* Nothing to do */
3290 break;
3291 case IOMMU_NOT_FOUND:
3292 case IOMMU_INIT_ERROR:
3293 case IOMMU_CMDLINE_DISABLED:
3294 /* Error states => do nothing */
3295 ret = -EINVAL;
3296 break;
3297 default:
3298 /* Unknown state */
3299 BUG();
3300 }
3301
3302 if (ret) {
3303 free_dma_resources();
3304 if (!irq_remapping_enabled) {
3305 disable_iommus();
3306 free_iommu_resources();
3307 } else {
3308 struct amd_iommu *iommu;
3309 struct amd_iommu_pci_seg *pci_seg;
3310
3311 for_each_pci_segment(pci_seg)
3312 uninit_device_table_dma(pci_seg);
3313
3314 for_each_iommu(iommu)
3315 amd_iommu_flush_all_caches(iommu);
3316 }
3317 }
3318 return ret;
3319 }
3320
iommu_go_to_state(enum iommu_init_state state)3321 static int __init iommu_go_to_state(enum iommu_init_state state)
3322 {
3323 int ret = -EINVAL;
3324
3325 while (init_state != state) {
3326 if (init_state == IOMMU_NOT_FOUND ||
3327 init_state == IOMMU_INIT_ERROR ||
3328 init_state == IOMMU_CMDLINE_DISABLED)
3329 break;
3330 ret = state_next();
3331 }
3332
3333 /*
3334 * SNP platform initilazation requires IOMMUs to be fully configured.
3335 * If the SNP support on IOMMUs has NOT been checked, simply mark SNP
3336 * as unsupported. If the SNP support on IOMMUs has been checked and
3337 * host SNP support enabled but RMP enforcement has not been enabled
3338 * in IOMMUs, then the system is in a half-baked state, but can limp
3339 * along as all memory should be Hypervisor-Owned in the RMP. WARN,
3340 * but leave SNP as "supported" to avoid confusing the kernel.
3341 */
3342 if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
3343 !WARN_ON_ONCE(amd_iommu_snp_en))
3344 cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3345
3346 return ret;
3347 }
3348
3349 #ifdef CONFIG_IRQ_REMAP
amd_iommu_prepare(void)3350 int __init amd_iommu_prepare(void)
3351 {
3352 int ret;
3353
3354 amd_iommu_irq_remap = true;
3355
3356 ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
3357 if (ret) {
3358 amd_iommu_irq_remap = false;
3359 return ret;
3360 }
3361
3362 return amd_iommu_irq_remap ? 0 : -ENODEV;
3363 }
3364
amd_iommu_enable(void)3365 int __init amd_iommu_enable(void)
3366 {
3367 int ret;
3368
3369 ret = iommu_go_to_state(IOMMU_ENABLED);
3370 if (ret)
3371 return ret;
3372
3373 irq_remapping_enabled = 1;
3374 return amd_iommu_xt_mode;
3375 }
3376
amd_iommu_disable(void)3377 void amd_iommu_disable(void)
3378 {
3379 amd_iommu_suspend();
3380 }
3381
amd_iommu_reenable(int mode)3382 int amd_iommu_reenable(int mode)
3383 {
3384 amd_iommu_resume();
3385
3386 return 0;
3387 }
3388
amd_iommu_enable_faulting(unsigned int cpu)3389 int amd_iommu_enable_faulting(unsigned int cpu)
3390 {
3391 /* We enable MSI later when PCI is initialized */
3392 return 0;
3393 }
3394 #endif
3395
3396 /*
3397 * This is the core init function for AMD IOMMU hardware in the system.
3398 * This function is called from the generic x86 DMA layer initialization
3399 * code.
3400 */
amd_iommu_init(void)3401 static int __init amd_iommu_init(void)
3402 {
3403 struct amd_iommu *iommu;
3404 int ret;
3405
3406 ret = iommu_go_to_state(IOMMU_INITIALIZED);
3407 #ifdef CONFIG_GART_IOMMU
3408 if (ret && list_empty(&amd_iommu_list)) {
3409 /*
3410 * We failed to initialize the AMD IOMMU - try fallback
3411 * to GART if possible.
3412 */
3413 gart_iommu_init();
3414 }
3415 #endif
3416
3417 for_each_iommu(iommu)
3418 amd_iommu_debugfs_setup(iommu);
3419
3420 return ret;
3421 }
3422
amd_iommu_sme_check(void)3423 static bool amd_iommu_sme_check(void)
3424 {
3425 if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT) ||
3426 (boot_cpu_data.x86 != 0x17))
3427 return true;
3428
3429 /* For Fam17h, a specific level of support is required */
3430 if (boot_cpu_data.microcode >= 0x08001205)
3431 return true;
3432
3433 if ((boot_cpu_data.microcode >= 0x08001126) &&
3434 (boot_cpu_data.microcode <= 0x080011ff))
3435 return true;
3436
3437 pr_notice("IOMMU not currently supported when SME is active\n");
3438
3439 return false;
3440 }
3441
3442 /****************************************************************************
3443 *
3444 * Early detect code. This code runs at IOMMU detection time in the DMA
3445 * layer. It just looks if there is an IVRS ACPI table to detect AMD
3446 * IOMMUs
3447 *
3448 ****************************************************************************/
amd_iommu_detect(void)3449 void __init amd_iommu_detect(void)
3450 {
3451 int ret;
3452
3453 if (no_iommu || (iommu_detected && !gart_iommu_aperture))
3454 goto disable_snp;
3455
3456 if (!amd_iommu_sme_check())
3457 goto disable_snp;
3458
3459 ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
3460 if (ret)
3461 goto disable_snp;
3462
3463 amd_iommu_detected = true;
3464 iommu_detected = 1;
3465 x86_init.iommu.iommu_init = amd_iommu_init;
3466 return;
3467
3468 disable_snp:
3469 if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
3470 cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
3471 }
3472
3473 /****************************************************************************
3474 *
3475 * Parsing functions for the AMD IOMMU specific kernel command line
3476 * options.
3477 *
3478 ****************************************************************************/
3479
parse_amd_iommu_dump(char * str)3480 static int __init parse_amd_iommu_dump(char *str)
3481 {
3482 amd_iommu_dump = true;
3483
3484 return 1;
3485 }
3486
parse_amd_iommu_intr(char * str)3487 static int __init parse_amd_iommu_intr(char *str)
3488 {
3489 for (; *str; ++str) {
3490 if (strncmp(str, "legacy", 6) == 0) {
3491 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
3492 break;
3493 }
3494 if (strncmp(str, "vapic", 5) == 0) {
3495 amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
3496 break;
3497 }
3498 }
3499 return 1;
3500 }
3501
parse_amd_iommu_options(char * str)3502 static int __init parse_amd_iommu_options(char *str)
3503 {
3504 if (!str)
3505 return -EINVAL;
3506
3507 while (*str) {
3508 if (strncmp(str, "fullflush", 9) == 0) {
3509 pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
3510 iommu_set_dma_strict();
3511 } else if (strncmp(str, "force_enable", 12) == 0) {
3512 amd_iommu_force_enable = true;
3513 } else if (strncmp(str, "off", 3) == 0) {
3514 amd_iommu_disabled = true;
3515 } else if (strncmp(str, "force_isolation", 15) == 0) {
3516 amd_iommu_force_isolation = true;
3517 } else if (strncmp(str, "pgtbl_v1", 8) == 0) {
3518 amd_iommu_pgtable = PD_MODE_V1;
3519 } else if (strncmp(str, "pgtbl_v2", 8) == 0) {
3520 amd_iommu_pgtable = PD_MODE_V2;
3521 } else if (strncmp(str, "irtcachedis", 11) == 0) {
3522 amd_iommu_irtcachedis = true;
3523 } else if (strncmp(str, "nohugepages", 11) == 0) {
3524 pr_info("Restricting V1 page-sizes to 4KiB");
3525 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_4K;
3526 } else if (strncmp(str, "v2_pgsizes_only", 15) == 0) {
3527 pr_info("Restricting V1 page-sizes to 4KiB/2MiB/1GiB");
3528 amd_iommu_pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
3529 } else {
3530 pr_notice("Unknown option - '%s'\n", str);
3531 }
3532
3533 str += strcspn(str, ",");
3534 while (*str == ',')
3535 str++;
3536 }
3537
3538 return 1;
3539 }
3540
parse_ivrs_ioapic(char * str)3541 static int __init parse_ivrs_ioapic(char *str)
3542 {
3543 u32 seg = 0, bus, dev, fn;
3544 int id, i;
3545 u32 devid;
3546
3547 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3548 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3549 goto found;
3550
3551 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3552 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3553 pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
3554 str, id, seg, bus, dev, fn);
3555 goto found;
3556 }
3557
3558 pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3559 return 1;
3560
3561 found:
3562 if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3563 pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3564 str);
3565 return 1;
3566 }
3567
3568 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3569
3570 cmdline_maps = true;
3571 i = early_ioapic_map_size++;
3572 early_ioapic_map[i].id = id;
3573 early_ioapic_map[i].devid = devid;
3574 early_ioapic_map[i].cmd_line = true;
3575
3576 return 1;
3577 }
3578
parse_ivrs_hpet(char * str)3579 static int __init parse_ivrs_hpet(char *str)
3580 {
3581 u32 seg = 0, bus, dev, fn;
3582 int id, i;
3583 u32 devid;
3584
3585 if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3586 sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
3587 goto found;
3588
3589 if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
3590 sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
3591 pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
3592 str, id, seg, bus, dev, fn);
3593 goto found;
3594 }
3595
3596 pr_err("Invalid command line: ivrs_hpet%s\n", str);
3597 return 1;
3598
3599 found:
3600 if (early_hpet_map_size == EARLY_MAP_SIZE) {
3601 pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3602 str);
3603 return 1;
3604 }
3605
3606 devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3607
3608 cmdline_maps = true;
3609 i = early_hpet_map_size++;
3610 early_hpet_map[i].id = id;
3611 early_hpet_map[i].devid = devid;
3612 early_hpet_map[i].cmd_line = true;
3613
3614 return 1;
3615 }
3616
3617 #define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
3618
parse_ivrs_acpihid(char * str)3619 static int __init parse_ivrs_acpihid(char *str)
3620 {
3621 u32 seg = 0, bus, dev, fn;
3622 char *hid, *uid, *p, *addr;
3623 char acpiid[ACPIID_LEN] = {0};
3624 int i;
3625
3626 addr = strchr(str, '@');
3627 if (!addr) {
3628 addr = strchr(str, '=');
3629 if (!addr)
3630 goto not_found;
3631
3632 ++addr;
3633
3634 if (strlen(addr) > ACPIID_LEN)
3635 goto not_found;
3636
3637 if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
3638 sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
3639 pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
3640 str, acpiid, seg, bus, dev, fn);
3641 goto found;
3642 }
3643 goto not_found;
3644 }
3645
3646 /* We have the '@', make it the terminator to get just the acpiid */
3647 *addr++ = 0;
3648
3649 if (strlen(str) > ACPIID_LEN + 1)
3650 goto not_found;
3651
3652 if (sscanf(str, "=%s", acpiid) != 1)
3653 goto not_found;
3654
3655 if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
3656 sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
3657 goto found;
3658
3659 not_found:
3660 pr_err("Invalid command line: ivrs_acpihid%s\n", str);
3661 return 1;
3662
3663 found:
3664 p = acpiid;
3665 hid = strsep(&p, ":");
3666 uid = p;
3667
3668 if (!hid || !(*hid) || !uid) {
3669 pr_err("Invalid command line: hid or uid\n");
3670 return 1;
3671 }
3672
3673 /*
3674 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
3675 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
3676 */
3677 while (*uid == '0' && *(uid + 1))
3678 uid++;
3679
3680 i = early_acpihid_map_size++;
3681 memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3682 memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3683 early_acpihid_map[i].devid = IVRS_GET_SBDF_ID(seg, bus, dev, fn);
3684 early_acpihid_map[i].cmd_line = true;
3685
3686 return 1;
3687 }
3688
3689 __setup("amd_iommu_dump", parse_amd_iommu_dump);
3690 __setup("amd_iommu=", parse_amd_iommu_options);
3691 __setup("amd_iommu_intr=", parse_amd_iommu_intr);
3692 __setup("ivrs_ioapic", parse_ivrs_ioapic);
3693 __setup("ivrs_hpet", parse_ivrs_hpet);
3694 __setup("ivrs_acpihid", parse_ivrs_acpihid);
3695
amd_iommu_pasid_supported(void)3696 bool amd_iommu_pasid_supported(void)
3697 {
3698 /* CPU page table size should match IOMMU guest page table size */
3699 if (cpu_feature_enabled(X86_FEATURE_LA57) &&
3700 amd_iommu_gpt_level != PAGE_MODE_5_LEVEL)
3701 return false;
3702
3703 /*
3704 * Since DTE[Mode]=0 is prohibited on SNP-enabled system
3705 * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
3706 * setting up IOMMUv1 page table.
3707 */
3708 return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
3709 }
3710
get_amd_iommu(unsigned int idx)3711 struct amd_iommu *get_amd_iommu(unsigned int idx)
3712 {
3713 unsigned int i = 0;
3714 struct amd_iommu *iommu;
3715
3716 for_each_iommu(iommu)
3717 if (i++ == idx)
3718 return iommu;
3719 return NULL;
3720 }
3721
3722 /****************************************************************************
3723 *
3724 * IOMMU EFR Performance Counter support functionality. This code allows
3725 * access to the IOMMU PC functionality.
3726 *
3727 ****************************************************************************/
3728
amd_iommu_pc_get_max_banks(unsigned int idx)3729 u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3730 {
3731 struct amd_iommu *iommu = get_amd_iommu(idx);
3732
3733 if (iommu)
3734 return iommu->max_banks;
3735
3736 return 0;
3737 }
3738
amd_iommu_pc_supported(void)3739 bool amd_iommu_pc_supported(void)
3740 {
3741 return amd_iommu_pc_present;
3742 }
3743
amd_iommu_pc_get_max_counters(unsigned int idx)3744 u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3745 {
3746 struct amd_iommu *iommu = get_amd_iommu(idx);
3747
3748 if (iommu)
3749 return iommu->max_counters;
3750
3751 return 0;
3752 }
3753
iommu_pc_get_set_reg(struct amd_iommu * iommu,u8 bank,u8 cntr,u8 fxn,u64 * value,bool is_write)3754 static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3755 u8 fxn, u64 *value, bool is_write)
3756 {
3757 u32 offset;
3758 u32 max_offset_lim;
3759
3760 /* Make sure the IOMMU PC resource is available */
3761 if (!amd_iommu_pc_present)
3762 return -ENODEV;
3763
3764 /* Check for valid iommu and pc register indexing */
3765 if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3766 return -ENODEV;
3767
3768 offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3769
3770 /* Limit the offset to the hw defined mmio region aperture */
3771 max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3772 (iommu->max_counters << 8) | 0x28);
3773 if ((offset < MMIO_CNTR_REG_OFFSET) ||
3774 (offset > max_offset_lim))
3775 return -EINVAL;
3776
3777 if (is_write) {
3778 u64 val = *value & GENMASK_ULL(47, 0);
3779
3780 writel((u32)val, iommu->mmio_base + offset);
3781 writel((val >> 32), iommu->mmio_base + offset + 4);
3782 } else {
3783 *value = readl(iommu->mmio_base + offset + 4);
3784 *value <<= 32;
3785 *value |= readl(iommu->mmio_base + offset);
3786 *value &= GENMASK_ULL(47, 0);
3787 }
3788
3789 return 0;
3790 }
3791
amd_iommu_pc_get_reg(struct amd_iommu * iommu,u8 bank,u8 cntr,u8 fxn,u64 * value)3792 int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3793 {
3794 if (!iommu)
3795 return -EINVAL;
3796
3797 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3798 }
3799
amd_iommu_pc_set_reg(struct amd_iommu * iommu,u8 bank,u8 cntr,u8 fxn,u64 * value)3800 int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3801 {
3802 if (!iommu)
3803 return -EINVAL;
3804
3805 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3806 }
3807
3808 #ifdef CONFIG_KVM_AMD_SEV
iommu_page_make_shared(void * page)3809 static int iommu_page_make_shared(void *page)
3810 {
3811 unsigned long paddr, pfn;
3812
3813 paddr = iommu_virt_to_phys(page);
3814 /* Cbit maybe set in the paddr */
3815 pfn = __sme_clr(paddr) >> PAGE_SHIFT;
3816
3817 if (!(pfn % PTRS_PER_PMD)) {
3818 int ret, level;
3819 bool assigned;
3820
3821 ret = snp_lookup_rmpentry(pfn, &assigned, &level);
3822 if (ret) {
3823 pr_warn("IOMMU PFN %lx RMP lookup failed, ret %d\n", pfn, ret);
3824 return ret;
3825 }
3826
3827 if (!assigned) {
3828 pr_warn("IOMMU PFN %lx not assigned in RMP table\n", pfn);
3829 return -EINVAL;
3830 }
3831
3832 if (level > PG_LEVEL_4K) {
3833 ret = psmash(pfn);
3834 if (!ret)
3835 goto done;
3836
3837 pr_warn("PSMASH failed for IOMMU PFN %lx huge RMP entry, ret: %d, level: %d\n",
3838 pfn, ret, level);
3839 return ret;
3840 }
3841 }
3842
3843 done:
3844 return rmp_make_shared(pfn, PG_LEVEL_4K);
3845 }
3846
iommu_make_shared(void * va,size_t size)3847 static int iommu_make_shared(void *va, size_t size)
3848 {
3849 void *page;
3850 int ret;
3851
3852 if (!va)
3853 return 0;
3854
3855 for (page = va; page < (va + size); page += PAGE_SIZE) {
3856 ret = iommu_page_make_shared(page);
3857 if (ret)
3858 return ret;
3859 }
3860
3861 return 0;
3862 }
3863
amd_iommu_snp_disable(void)3864 int amd_iommu_snp_disable(void)
3865 {
3866 struct amd_iommu *iommu;
3867 int ret;
3868
3869 if (!amd_iommu_snp_en)
3870 return 0;
3871
3872 for_each_iommu(iommu) {
3873 ret = iommu_make_shared(iommu->evt_buf, EVT_BUFFER_SIZE);
3874 if (ret)
3875 return ret;
3876
3877 ret = iommu_make_shared(iommu->ppr_log, PPR_LOG_SIZE);
3878 if (ret)
3879 return ret;
3880
3881 ret = iommu_make_shared((void *)iommu->cmd_sem, PAGE_SIZE);
3882 if (ret)
3883 return ret;
3884 }
3885
3886 return 0;
3887 }
3888 EXPORT_SYMBOL_GPL(amd_iommu_snp_disable);
3889 #endif
3890