1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * AMD Address Translation Library
4 *
5 * internal.h : Helper functions and common defines
6 *
7 * Copyright (c) 2023, Advanced Micro Devices, Inc.
8 * All Rights Reserved.
9 *
10 * Author: Yazen Ghannam <[email protected]>
11 */
12
13 #ifndef __AMD_ATL_INTERNAL_H__
14 #define __AMD_ATL_INTERNAL_H__
15
16 #include <linux/bitfield.h>
17 #include <linux/bitops.h>
18 #include <linux/ras.h>
19
20 #include <asm/amd_nb.h>
21 #include <asm/amd_node.h>
22
23 #include "reg_fields.h"
24
25 #undef pr_fmt
26 #define pr_fmt(fmt) "amd_atl: " fmt
27
28 /* Maximum possible number of Coherent Stations within a single Data Fabric. */
29 #define MAX_COH_ST_CHANNELS 32
30
31 /* PCI ID for Zen4 Server DF Function 0. */
32 #define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022
33
34 /* PCI IDs for MI300 DF Function 0. */
35 #define DF_FUNC0_ID_MI300 0x15281022
36
37 /* Shift needed for adjusting register values to true values. */
38 #define DF_DRAM_BASE_LIMIT_LSB 28
39 #define MI300_DRAM_LIMIT_LSB 20
40
41 #define INVALID_SPA ~0ULL
42
43 enum df_revisions {
44 UNKNOWN,
45 DF2,
46 DF3,
47 DF3p5,
48 DF4,
49 DF4p5,
50 };
51
52 /* These are mapped 1:1 to the hardware values. Special cases are set at > 0x20. */
53 enum intlv_modes {
54 NONE = 0x00,
55 NOHASH_2CHAN = 0x01,
56 NOHASH_4CHAN = 0x03,
57 NOHASH_8CHAN = 0x05,
58 DF3_6CHAN = 0x06,
59 NOHASH_16CHAN = 0x07,
60 NOHASH_32CHAN = 0x08,
61 DF3_COD4_2CHAN_HASH = 0x0C,
62 DF3_COD2_4CHAN_HASH = 0x0D,
63 DF3_COD1_8CHAN_HASH = 0x0E,
64 DF4_NPS4_2CHAN_HASH = 0x10,
65 DF4_NPS2_4CHAN_HASH = 0x11,
66 DF4_NPS1_8CHAN_HASH = 0x12,
67 DF4_NPS4_3CHAN_HASH = 0x13,
68 DF4_NPS2_6CHAN_HASH = 0x14,
69 DF4_NPS1_12CHAN_HASH = 0x15,
70 DF4_NPS2_5CHAN_HASH = 0x16,
71 DF4_NPS1_10CHAN_HASH = 0x17,
72 MI3_HASH_8CHAN = 0x18,
73 MI3_HASH_16CHAN = 0x19,
74 MI3_HASH_32CHAN = 0x1A,
75 DF2_2CHAN_HASH = 0x21,
76 /* DF4.5 modes are all IntLvNumChan + 0x20 */
77 DF4p5_NPS1_16CHAN_1K_HASH = 0x2C,
78 DF4p5_NPS0_24CHAN_1K_HASH = 0x2E,
79 DF4p5_NPS4_2CHAN_1K_HASH = 0x30,
80 DF4p5_NPS2_4CHAN_1K_HASH = 0x31,
81 DF4p5_NPS1_8CHAN_1K_HASH = 0x32,
82 DF4p5_NPS4_3CHAN_1K_HASH = 0x33,
83 DF4p5_NPS2_6CHAN_1K_HASH = 0x34,
84 DF4p5_NPS1_12CHAN_1K_HASH = 0x35,
85 DF4p5_NPS2_5CHAN_1K_HASH = 0x36,
86 DF4p5_NPS1_10CHAN_1K_HASH = 0x37,
87 DF4p5_NPS4_2CHAN_2K_HASH = 0x40,
88 DF4p5_NPS2_4CHAN_2K_HASH = 0x41,
89 DF4p5_NPS1_8CHAN_2K_HASH = 0x42,
90 DF4p5_NPS1_16CHAN_2K_HASH = 0x43,
91 DF4p5_NPS4_3CHAN_2K_HASH = 0x44,
92 DF4p5_NPS2_6CHAN_2K_HASH = 0x45,
93 DF4p5_NPS1_12CHAN_2K_HASH = 0x46,
94 DF4p5_NPS0_24CHAN_2K_HASH = 0x47,
95 DF4p5_NPS2_5CHAN_2K_HASH = 0x48,
96 DF4p5_NPS1_10CHAN_2K_HASH = 0x49,
97 };
98
99 struct df4p5_denorm_ctx {
100 /* Indicates the number of "lost" bits. This will be 1, 2, or 3. */
101 u8 perm_shift;
102
103 /* A mask indicating the bits that need to be rehashed. */
104 u16 rehash_vector;
105
106 /*
107 * Represents the value that the high bits of the normalized address
108 * are divided by during normalization. This value will be 3 for
109 * interleave modes with a number of channels divisible by 3 or the
110 * value will be 5 for interleave modes with a number of channels
111 * divisible by 5. Power-of-two interleave modes are handled
112 * separately.
113 */
114 u8 mod_value;
115
116 /*
117 * Represents the bits that can be directly pulled from the normalized
118 * address. In each case, pass through bits [7:0] of the normalized
119 * address. The other bits depend on the interleave bit position which
120 * will be bit 10 for 1K interleave stripe cases and bit 11 for 2K
121 * interleave stripe cases.
122 */
123 u64 base_denorm_addr;
124
125 /*
126 * Represents the high bits of the physical address that have been
127 * divided by the mod_value.
128 */
129 u64 div_addr;
130
131 u64 current_spa;
132 u64 resolved_spa;
133
134 u16 coh_st_fabric_id;
135 };
136
137 struct df_flags {
138 __u8 legacy_ficaa : 1,
139 socket_id_shift_quirk : 1,
140 heterogeneous : 1,
141 __reserved_0 : 5;
142 };
143
144 struct df_config {
145 enum df_revisions rev;
146
147 /*
148 * These masks operate on the 16-bit Coherent Station IDs,
149 * e.g. Instance, Fabric, Destination, etc.
150 */
151 u16 component_id_mask;
152 u16 die_id_mask;
153 u16 node_id_mask;
154 u16 socket_id_mask;
155
156 /*
157 * Least-significant bit of Node ID portion of the
158 * system-wide Coherent Station Fabric ID.
159 */
160 u8 node_id_shift;
161
162 /*
163 * Least-significant bit of Die portion of the Node ID.
164 * Adjusted to include the Node ID shift in order to apply
165 * to the Coherent Station Fabric ID.
166 */
167 u8 die_id_shift;
168
169 /*
170 * Least-significant bit of Socket portion of the Node ID.
171 * Adjusted to include the Node ID shift in order to apply
172 * to the Coherent Station Fabric ID.
173 */
174 u8 socket_id_shift;
175
176 /* Number of DRAM Address maps visible in a Coherent Station. */
177 u8 num_coh_st_maps;
178
179 u32 dram_hole_base;
180
181 /* Global flags to handle special cases. */
182 struct df_flags flags;
183 };
184
185 extern struct df_config df_cfg;
186
187 struct dram_addr_map {
188 /*
189 * Each DRAM Address Map can operate independently
190 * in different interleaving modes.
191 */
192 enum intlv_modes intlv_mode;
193
194 /* System-wide number for this address map. */
195 u8 num;
196
197 /* Raw register values */
198 u32 base;
199 u32 limit;
200 u32 ctl;
201 u32 intlv;
202
203 /*
204 * Logical to Physical Coherent Station Remapping array
205 *
206 * Index: Logical Coherent Station Instance ID
207 * Value: Physical Coherent Station Instance ID
208 *
209 * phys_coh_st_inst_id = remap_array[log_coh_st_inst_id]
210 */
211 u8 remap_array[MAX_COH_ST_CHANNELS];
212
213 /*
214 * Number of bits covering DRAM Address map 0
215 * when interleaving is non-power-of-2.
216 *
217 * Used only for DF3_6CHAN.
218 */
219 u8 np2_bits;
220
221 /* Position of the 'interleave bit'. */
222 u8 intlv_bit_pos;
223 /* Number of channels interleaved in this map. */
224 u8 num_intlv_chan;
225 /* Number of dies interleaved in this map. */
226 u8 num_intlv_dies;
227 /* Number of sockets interleaved in this map. */
228 u8 num_intlv_sockets;
229 /*
230 * Total number of channels interleaved accounting
231 * for die and socket interleaving.
232 */
233 u8 total_intlv_chan;
234 /* Total bits needed to cover 'total_intlv_chan'. */
235 u8 total_intlv_bits;
236 };
237
238 /* Original input values cached for debug printing. */
239 struct addr_ctx_inputs {
240 u64 norm_addr;
241 u8 socket_id;
242 u8 die_id;
243 u8 coh_st_inst_id;
244 };
245
246 struct addr_ctx {
247 u64 ret_addr;
248
249 struct addr_ctx_inputs inputs;
250 struct dram_addr_map map;
251
252 /* AMD Node ID calculated from Socket and Die IDs. */
253 u8 node_id;
254
255 /*
256 * Coherent Station Instance ID
257 * Local ID used within a 'node'.
258 */
259 u16 inst_id;
260
261 /*
262 * Coherent Station Fabric ID
263 * System-wide ID that includes 'node' bits.
264 */
265 u16 coh_st_fabric_id;
266 };
267
268 int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
269 int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo);
270
271 int get_df_system_info(void);
272 int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num);
273 int get_umc_info_mi300(void);
274
275 int get_address_map(struct addr_ctx *ctx);
276
277 int denormalize_address(struct addr_ctx *ctx);
278 int dehash_address(struct addr_ctx *ctx);
279
280 unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
281 unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
282
283 u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
284 u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
285
286 #ifdef CONFIG_AMD_ATL_PRM
287 unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id, unsigned long addr);
288 #else
prm_umc_norm_to_sys_addr(u8 socket_id,u64 umc_bank_inst_id,unsigned long addr)289 static inline unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id,
290 unsigned long addr)
291 {
292 return -ENODEV;
293 }
294 #endif
295
296 /*
297 * Make a gap in @data that is @num_bits long starting at @bit_num.
298 * e.g. data = 11111111'b
299 * bit_num = 3
300 * num_bits = 2
301 * result = 1111100111'b
302 */
expand_bits(u8 bit_num,u8 num_bits,u64 data)303 static inline u64 expand_bits(u8 bit_num, u8 num_bits, u64 data)
304 {
305 u64 temp1, temp2;
306
307 if (!num_bits)
308 return data;
309
310 if (!bit_num) {
311 WARN_ON_ONCE(num_bits >= BITS_PER_LONG);
312 return data << num_bits;
313 }
314
315 WARN_ON_ONCE(bit_num >= BITS_PER_LONG);
316
317 temp1 = data & GENMASK_ULL(bit_num - 1, 0);
318
319 temp2 = data & GENMASK_ULL(63, bit_num);
320 temp2 <<= num_bits;
321
322 return temp1 | temp2;
323 }
324
325 /*
326 * Remove bits in @data between @low_bit and @high_bit inclusive.
327 * e.g. data = XXXYYZZZ'b
328 * low_bit = 3
329 * high_bit = 4
330 * result = XXXZZZ'b
331 */
remove_bits(u8 low_bit,u8 high_bit,u64 data)332 static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data)
333 {
334 u64 temp1, temp2;
335
336 WARN_ON_ONCE(high_bit >= BITS_PER_LONG);
337 WARN_ON_ONCE(low_bit >= BITS_PER_LONG);
338 WARN_ON_ONCE(low_bit > high_bit);
339
340 if (!low_bit)
341 return data >> (high_bit++);
342
343 temp1 = GENMASK_ULL(low_bit - 1, 0) & data;
344 temp2 = GENMASK_ULL(63, high_bit + 1) & data;
345 temp2 >>= high_bit - low_bit + 1;
346
347 return temp1 | temp2;
348 }
349
350 #define atl_debug(ctx, fmt, arg...) \
351 pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\
352 (ctx)->inputs.socket_id, (ctx)->inputs.die_id,\
353 (ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg)
354
atl_debug_on_bad_df_rev(void)355 static inline void atl_debug_on_bad_df_rev(void)
356 {
357 pr_debug("Unrecognized DF rev: %u", df_cfg.rev);
358 }
359
atl_debug_on_bad_intlv_mode(struct addr_ctx * ctx)360 static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx)
361 {
362 atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode);
363 }
364
365 #define MI300_UMC_MCA_COL GENMASK(5, 1)
366 #define MI300_UMC_MCA_ROW13 BIT(23)
367
368 #endif /* __AMD_ATL_INTERNAL_H__ */
369