xref: /aosp_15_r20/external/mesa3d/src/intel/dev/intel_device_info.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2013 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 
31 #include "util/libdrm.h"
32 
33 #include "intel_device_info.h"
34 #include "intel_wa.h"
35 #include "i915/intel_device_info.h"
36 #include "xe/intel_device_info.h"
37 
38 #include "common/intel_gem.h"
39 #include "util/u_debug.h"
40 #include "util/log.h"
41 #include "util/macros.h"
42 
43 static const struct {
44    const char *name;
45    int pci_id;
46 } name_map[] = {
47    { "lpt", 0x27a2 },
48    { "brw", 0x2a02 },
49    { "g4x", 0x2a42 },
50    { "ilk", 0x0042 },
51    { "snb", 0x0126 },
52    { "ivb", 0x016a },
53    { "hsw", 0x0d2e },
54    { "byt", 0x0f33 },
55    { "bdw", 0x162e },
56    { "chv", 0x22B3 },
57    { "skl", 0x1912 },
58    { "bxt", 0x5A85 },
59    { "kbl", 0x5912 },
60    { "aml", 0x591C },
61    { "glk", 0x3185 },
62    { "cfl", 0x3E9B },
63    { "whl", 0x3EA1 },
64    { "cml", 0x9b41 },
65    { "icl", 0x8a52 },
66    { "ehl", 0x4571 },
67    { "jsl", 0x4E71 },
68    { "tgl", 0x9a49 },
69    { "rkl", 0x4c8a },
70    { "dg1", 0x4905 },
71    { "adl", 0x4680 },
72    { "sg1", 0x4907 },
73    { "rpl", 0xa780 },
74    { "dg2", 0x5690 },
75    { "mtl", 0x7d60 },
76    { "arl", 0x7d67 },
77    { "lnl", 0x64a0 },
78    { "bmg", 0xe202 },
79 };
80 
81 /**
82  * Get the PCI ID for the device name.
83  *
84  * Returns -1 if the device is not known.
85  */
86 int
intel_device_name_to_pci_device_id(const char * name)87 intel_device_name_to_pci_device_id(const char *name)
88 {
89    for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
90       if (!strcmp(name_map[i].name, name))
91          return name_map[i].pci_id;
92    }
93 
94    return -1;
95 }
96 
97 static const struct intel_device_info intel_device_info_gfx3 = {
98    .ver = 3,
99    .platform = INTEL_PLATFORM_GFX3,
100    .simulator_id = -1,
101    .num_slices = 1,
102    .num_subslices = { 1, },
103    .max_eus_per_subslice = 8,
104    .num_thread_per_eu = 4,
105    .grf_size = 32,
106    .timestamp_frequency = 12500000,
107 };
108 
109 static const struct intel_device_info intel_device_info_i965 = {
110    .ver = 4,
111    .platform = INTEL_PLATFORM_I965,
112    .has_negative_rhw_bug = true,
113    .num_slices = 1,
114    .num_subslices = { 1, },
115    .max_eus_per_subslice = 8,
116    .num_thread_per_eu = 4,
117    .grf_size = 32,
118    .max_vs_threads = 16,
119    .max_gs_threads = 2,
120    .max_wm_threads = 8 * 4,
121    .urb = {
122       .size = 256,
123    },
124    .timestamp_frequency = 12500000,
125    .simulator_id = -1,
126 };
127 
128 static const struct intel_device_info intel_device_info_g4x = {
129    .ver = 4,
130    .verx10 = 45,
131    .has_pln = true,
132    .has_compr4 = true,
133    .has_surface_tile_offset = true,
134    .platform = INTEL_PLATFORM_G4X,
135    .num_slices = 1,
136    .num_subslices = { 1, },
137    .max_eus_per_subslice = 10,
138    .num_thread_per_eu = 5,
139    .grf_size = 32,
140    .max_vs_threads = 32,
141    .max_gs_threads = 2,
142    .max_wm_threads = 10 * 5,
143    .urb = {
144       .size = 384,
145    },
146    .timestamp_frequency = 12500000,
147    .simulator_id = -1,
148 };
149 
150 static const struct intel_device_info intel_device_info_ilk = {
151    .ver = 5,
152    .platform = INTEL_PLATFORM_ILK,
153    .has_pln = true,
154    .has_compr4 = true,
155    .has_surface_tile_offset = true,
156    .num_slices = 1,
157    .num_subslices = { 1, },
158    .max_eus_per_subslice = 12,
159    .num_thread_per_eu = 6,
160    .grf_size = 32,
161    .max_vs_threads = 72,
162    .max_gs_threads = 32,
163    .max_wm_threads = 12 * 6,
164    .urb = {
165       .size = 1024,
166    },
167    .timestamp_frequency = 12500000,
168    .simulator_id = -1,
169 };
170 
171 static const struct intel_device_info intel_device_info_snb_gt1 = {
172    .ver = 6,
173    .gt = 1,
174    .platform = INTEL_PLATFORM_SNB,
175    .has_hiz_and_separate_stencil = true,
176    .has_llc = true,
177    .has_pln = true,
178    .has_surface_tile_offset = true,
179    .needs_unlit_centroid_workaround = true,
180    .num_slices = 1,
181    .num_subslices = { 1, },
182    .max_eus_per_subslice = 6,
183    .num_thread_per_eu = 6, /* Not confirmed */
184    .grf_size = 32,
185    .max_vs_threads = 24,
186    .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
187    .max_wm_threads = 40,
188    .urb = {
189       .size = 32,
190       .min_entries = {
191          [MESA_SHADER_VERTEX]   = 24,
192       },
193       .max_entries = {
194          [MESA_SHADER_VERTEX]   = 256,
195          [MESA_SHADER_GEOMETRY] = 256,
196       },
197    },
198    .timestamp_frequency = 12500000,
199    .simulator_id = -1,
200 };
201 
202 static const struct intel_device_info intel_device_info_snb_gt2 = {
203    .ver = 6,
204    .gt = 2,
205    .platform = INTEL_PLATFORM_SNB,
206    .has_hiz_and_separate_stencil = true,
207    .has_llc = true,
208    .has_pln = true,
209    .has_surface_tile_offset = true,
210    .needs_unlit_centroid_workaround = true,
211    .num_slices = 1,
212    .num_subslices = { 1, },
213    .max_eus_per_subslice = 12,
214    .num_thread_per_eu = 6, /* Not confirmed */
215    .grf_size = 32,
216    .max_vs_threads = 60,
217    .max_gs_threads = 60,
218    .max_wm_threads = 80,
219    .urb = {
220       .size = 64,
221       .min_entries = {
222          [MESA_SHADER_VERTEX]   = 24,
223       },
224       .max_entries = {
225          [MESA_SHADER_VERTEX]   = 256,
226          [MESA_SHADER_GEOMETRY] = 256,
227       },
228    },
229    .timestamp_frequency = 12500000,
230    .simulator_id = -1,
231 };
232 
233 #define GFX7_FEATURES                               \
234    .ver = 7,                                        \
235    .has_hiz_and_separate_stencil = true,            \
236    .must_use_separate_stencil = true,               \
237    .has_llc = true,                                 \
238    .has_pln = true,                                 \
239    .has_64bit_float = true,                         \
240    .has_surface_tile_offset = true,                 \
241    .grf_size = 32,                                  \
242    .timestamp_frequency = 12500000,                 \
243    .max_constant_urb_size_kb = 16
244 
245 static const struct intel_device_info intel_device_info_ivb_gt1 = {
246    GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 1,
247    .num_slices = 1,
248    .num_subslices = { 1, },
249    .max_eus_per_subslice = 6,
250    .num_thread_per_eu = 6,
251    .l3_banks = 2,
252    .max_vs_threads = 36,
253    .max_tcs_threads = 36,
254    .max_tes_threads = 36,
255    .max_gs_threads = 36,
256    .max_wm_threads = 48,
257    .max_cs_threads = 36,
258    .urb = {
259       .min_entries = {
260          [MESA_SHADER_VERTEX]    = 32,
261          [MESA_SHADER_TESS_EVAL] = 10,
262       },
263       .max_entries = {
264          [MESA_SHADER_VERTEX]    = 512,
265          [MESA_SHADER_TESS_CTRL] = 32,
266          [MESA_SHADER_TESS_EVAL] = 288,
267          [MESA_SHADER_GEOMETRY]  = 192,
268       },
269    },
270    .simulator_id = 7,
271 };
272 
273 static const struct intel_device_info intel_device_info_ivb_gt2 = {
274    GFX7_FEATURES, .platform = INTEL_PLATFORM_IVB, .gt = 2,
275    .num_slices = 1,
276    .num_subslices = { 1, },
277    .max_eus_per_subslice = 12,
278    .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
279                             * @max_wm_threads ... */
280    .l3_banks = 4,
281    .max_vs_threads = 128,
282    .max_tcs_threads = 128,
283    .max_tes_threads = 128,
284    .max_gs_threads = 128,
285    .max_wm_threads = 172,
286    .max_cs_threads = 64,
287    .urb = {
288       .min_entries = {
289          [MESA_SHADER_VERTEX]    = 32,
290          [MESA_SHADER_TESS_EVAL] = 10,
291       },
292       .max_entries = {
293          [MESA_SHADER_VERTEX]    = 704,
294          [MESA_SHADER_TESS_CTRL] = 64,
295          [MESA_SHADER_TESS_EVAL] = 448,
296          [MESA_SHADER_GEOMETRY]  = 320,
297       },
298    },
299    .simulator_id = 7,
300 };
301 
302 static const struct intel_device_info intel_device_info_byt = {
303    GFX7_FEATURES, .platform = INTEL_PLATFORM_BYT, .gt = 1,
304    .num_slices = 1,
305    .num_subslices = { 1, },
306    .max_eus_per_subslice = 4,
307    .num_thread_per_eu = 8,
308    .l3_banks = 1,
309    .has_llc = false,
310    .max_vs_threads = 36,
311    .max_tcs_threads = 36,
312    .max_tes_threads = 36,
313    .max_gs_threads = 36,
314    .max_wm_threads = 48,
315    .max_cs_threads = 32,
316    .urb = {
317       .min_entries = {
318          [MESA_SHADER_VERTEX]    = 32,
319          [MESA_SHADER_TESS_EVAL] = 10,
320       },
321       .max_entries = {
322          [MESA_SHADER_VERTEX]    = 512,
323          [MESA_SHADER_TESS_CTRL] = 32,
324          [MESA_SHADER_TESS_EVAL] = 288,
325          [MESA_SHADER_GEOMETRY]  = 192,
326       },
327    },
328    .simulator_id = 10,
329 };
330 
331 #define HSW_FEATURES \
332    GFX7_FEATURES, \
333    .platform = INTEL_PLATFORM_HSW, \
334    .verx10 = 75, \
335    .supports_simd16_3src = true
336 
337 static const struct intel_device_info intel_device_info_hsw_gt1 = {
338    HSW_FEATURES, .gt = 1,
339    .num_slices = 1,
340    .num_subslices = { 1, },
341    .max_eus_per_subslice = 10,
342    .num_thread_per_eu = 7,
343    .l3_banks = 2,
344    .max_vs_threads = 70,
345    .max_tcs_threads = 70,
346    .max_tes_threads = 70,
347    .max_gs_threads = 70,
348    .max_wm_threads = 102,
349    .max_cs_threads = 70,
350    .urb = {
351       .min_entries = {
352          [MESA_SHADER_VERTEX]    = 32,
353          [MESA_SHADER_TESS_EVAL] = 10,
354       },
355       .max_entries = {
356          [MESA_SHADER_VERTEX]    = 640,
357          [MESA_SHADER_TESS_CTRL] = 64,
358          [MESA_SHADER_TESS_EVAL] = 384,
359          [MESA_SHADER_GEOMETRY]  = 256,
360       },
361    },
362    .simulator_id = 9,
363 };
364 
365 static const struct intel_device_info intel_device_info_hsw_gt2 = {
366    HSW_FEATURES, .gt = 2,
367    .num_slices = 1,
368    .num_subslices = { 2, },
369    .max_eus_per_subslice = 10,
370    .num_thread_per_eu = 7,
371    .l3_banks = 4,
372    .max_vs_threads = 280,
373    .max_tcs_threads = 256,
374    .max_tes_threads = 280,
375    .max_gs_threads = 256,
376    .max_wm_threads = 204,
377    .max_cs_threads = 70,
378    .urb = {
379       .min_entries = {
380          [MESA_SHADER_VERTEX]    = 64,
381          [MESA_SHADER_TESS_EVAL] = 10,
382       },
383       .max_entries = {
384          [MESA_SHADER_VERTEX]    = 1664,
385          [MESA_SHADER_TESS_CTRL] = 128,
386          [MESA_SHADER_TESS_EVAL] = 960,
387          [MESA_SHADER_GEOMETRY]  = 640,
388       },
389    },
390    .simulator_id = 9,
391 };
392 
393 static const struct intel_device_info intel_device_info_hsw_gt3 = {
394    HSW_FEATURES, .gt = 3,
395    .num_slices = 2,
396    .num_subslices = { 2, 2, },
397    .max_eus_per_subslice = 10,
398    .num_thread_per_eu = 7,
399    .l3_banks = 8,
400    .max_vs_threads = 280,
401    .max_tcs_threads = 256,
402    .max_tes_threads = 280,
403    .max_gs_threads = 256,
404    .max_wm_threads = 408,
405    .max_cs_threads = 70,
406    .urb = {
407       .min_entries = {
408          [MESA_SHADER_VERTEX]    = 64,
409          [MESA_SHADER_TESS_EVAL] = 10,
410       },
411       .max_entries = {
412          [MESA_SHADER_VERTEX]    = 1664,
413          [MESA_SHADER_TESS_CTRL] = 128,
414          [MESA_SHADER_TESS_EVAL] = 960,
415          [MESA_SHADER_GEOMETRY]  = 640,
416       },
417    },
418    .max_constant_urb_size_kb = 32,
419    .simulator_id = 9,
420 };
421 
422 /* It's unclear how well supported sampling from the hiz buffer is on GFX8,
423  * so keep things conservative for now and set has_sample_with_hiz = false.
424  */
425 #define GFX8_FEATURES                               \
426    .ver = 8,                                        \
427    .has_hiz_and_separate_stencil = true,            \
428    .must_use_separate_stencil = true,               \
429    .has_llc = true,                                 \
430    .has_sample_with_hiz = false,                    \
431    .has_pln = true,                                 \
432    .has_integer_dword_mul = true,                   \
433    .has_64bit_float = true,                         \
434    .has_64bit_int = true,                           \
435    .supports_simd16_3src = true,                    \
436    .has_surface_tile_offset = true,                 \
437    .num_thread_per_eu = 7,                          \
438    .grf_size = 32,                                  \
439    .max_vs_threads = 504,                           \
440    .max_tcs_threads = 504,                          \
441    .max_tes_threads = 504,                          \
442    .max_gs_threads = 504,                           \
443    .max_wm_threads = 384,                           \
444    .max_threads_per_psd = 64,                       \
445    .timestamp_frequency = 12500000,                 \
446    .max_constant_urb_size_kb = 32
447 
448 static const struct intel_device_info intel_device_info_bdw_gt1 = {
449    GFX8_FEATURES, .gt = 1,
450    .platform = INTEL_PLATFORM_BDW,
451    .num_slices = 1,
452    .num_subslices = { 2, },
453    .max_eus_per_subslice = 6,
454    .l3_banks = 2,
455    .max_cs_threads = 42,
456    .urb = {
457       .min_entries = {
458          [MESA_SHADER_VERTEX]    = 64,
459          [MESA_SHADER_TESS_EVAL] = 34,
460       },
461       .max_entries = {
462          [MESA_SHADER_VERTEX]    = 2560,
463          [MESA_SHADER_TESS_CTRL] = 504,
464          [MESA_SHADER_TESS_EVAL] = 1536,
465          /* Reduced from 960, seems to be similar to the bug on Gfx9 GT1. */
466          [MESA_SHADER_GEOMETRY]  = 690,
467       },
468    },
469    .simulator_id = 11,
470 };
471 
472 static const struct intel_device_info intel_device_info_bdw_gt2 = {
473    GFX8_FEATURES, .gt = 2,
474    .platform = INTEL_PLATFORM_BDW,
475    .num_slices = 1,
476    .num_subslices = { 3, },
477    .max_eus_per_subslice = 8,
478    .l3_banks = 4,
479    .max_cs_threads = 56,
480    .urb = {
481       .min_entries = {
482          [MESA_SHADER_VERTEX]    = 64,
483          [MESA_SHADER_TESS_EVAL] = 34,
484       },
485       .max_entries = {
486          [MESA_SHADER_VERTEX]    = 2560,
487          [MESA_SHADER_TESS_CTRL] = 504,
488          [MESA_SHADER_TESS_EVAL] = 1536,
489          [MESA_SHADER_GEOMETRY]  = 960,
490       },
491    },
492    .simulator_id = 11,
493 };
494 
495 static const struct intel_device_info intel_device_info_bdw_gt3 = {
496    GFX8_FEATURES, .gt = 3,
497    .platform = INTEL_PLATFORM_BDW,
498    .num_slices = 2,
499    .num_subslices = { 3, 3, },
500    .max_eus_per_subslice = 8,
501    .l3_banks = 8,
502    .max_cs_threads = 56,
503    .urb = {
504       .min_entries = {
505          [MESA_SHADER_VERTEX]    = 64,
506          [MESA_SHADER_TESS_EVAL] = 34,
507       },
508       .max_entries = {
509          [MESA_SHADER_VERTEX]    = 2560,
510          [MESA_SHADER_TESS_CTRL] = 504,
511          [MESA_SHADER_TESS_EVAL] = 1536,
512          [MESA_SHADER_GEOMETRY]  = 960,
513       },
514    },
515    .simulator_id = 11,
516 };
517 
518 static const struct intel_device_info intel_device_info_chv = {
519    GFX8_FEATURES, .platform = INTEL_PLATFORM_CHV, .gt = 1,
520    .has_llc = false,
521    .has_integer_dword_mul = false,
522    .num_slices = 1,
523    .num_subslices = { 2, },
524    .max_eus_per_subslice = 8,
525    .l3_banks = 2,
526    .max_vs_threads = 80,
527    .max_tcs_threads = 80,
528    .max_tes_threads = 80,
529    .max_gs_threads = 80,
530    .max_wm_threads = 128,
531    .max_cs_threads = 6 * 7,
532    .urb = {
533       .min_entries = {
534          [MESA_SHADER_VERTEX]    = 34,
535          [MESA_SHADER_TESS_EVAL] = 34,
536       },
537       .max_entries = {
538          [MESA_SHADER_VERTEX]    = 640,
539          [MESA_SHADER_TESS_CTRL] = 80,
540          [MESA_SHADER_TESS_EVAL] = 384,
541          [MESA_SHADER_GEOMETRY]  = 256,
542       },
543    },
544    .simulator_id = 13,
545 };
546 
547 #define GFX9_HW_INFO                                \
548    .ver = 9,                                        \
549    .max_vs_threads = 336,                           \
550    .max_gs_threads = 336,                           \
551    .max_tcs_threads = 336,                          \
552    .max_tes_threads = 336,                          \
553    .max_threads_per_psd = 64,                       \
554    .max_cs_threads = 56,                            \
555    .timestamp_frequency = 12000000,                 \
556    .urb = {                                         \
557       .min_entries = {                              \
558          [MESA_SHADER_VERTEX]    = 64,              \
559          [MESA_SHADER_TESS_EVAL] = 34,              \
560       },                                            \
561       .max_entries = {                              \
562          [MESA_SHADER_VERTEX]    = 1856,            \
563          [MESA_SHADER_TESS_CTRL] = 672,             \
564          [MESA_SHADER_TESS_EVAL] = 1120,            \
565          [MESA_SHADER_GEOMETRY]  = 640,             \
566       },                                            \
567    }
568 
569 #define GFX9_LP_FEATURES                           \
570    GFX8_FEATURES,                                  \
571    GFX9_HW_INFO,                                   \
572    .has_integer_dword_mul = false,                 \
573    .gt = 1,                                        \
574    .has_llc = false,                               \
575    .has_sample_with_hiz = true,                    \
576    .has_illegal_ccs_values = true,                 \
577    .num_slices = 1,                                \
578    .num_thread_per_eu = 6,                         \
579    .max_vs_threads = 112,                          \
580    .max_tcs_threads = 112,                         \
581    .max_tes_threads = 112,                         \
582    .max_gs_threads = 112,                          \
583    .max_cs_threads = 6 * 6,                        \
584    .timestamp_frequency = 19200000,                \
585    .urb = {                                        \
586       .min_entries = {                             \
587          [MESA_SHADER_VERTEX]    = 34,             \
588          [MESA_SHADER_TESS_EVAL] = 34,             \
589       },                                           \
590       .max_entries = {                             \
591          [MESA_SHADER_VERTEX]    = 704,            \
592          [MESA_SHADER_TESS_CTRL] = 256,            \
593          [MESA_SHADER_TESS_EVAL] = 416,            \
594          [MESA_SHADER_GEOMETRY]  = 256,            \
595       },                                           \
596    }
597 
598 #define GFX9_LP_FEATURES_3X6                       \
599    GFX9_LP_FEATURES,                               \
600    .num_subslices = { 3, },                        \
601    .max_eus_per_subslice = 6
602 
603 #define GFX9_LP_FEATURES_2X6                       \
604    GFX9_LP_FEATURES,                               \
605    .num_subslices = { 2, },                        \
606    .max_eus_per_subslice = 6,                       \
607    .max_vs_threads = 56,                           \
608    .max_tcs_threads = 56,                          \
609    .max_tes_threads = 56,                          \
610    .max_gs_threads = 56,                           \
611    .max_cs_threads = 6 * 6,                        \
612    .urb = {                                        \
613       .min_entries = {                             \
614          [MESA_SHADER_VERTEX]    = 34,             \
615          [MESA_SHADER_TESS_EVAL] = 34,             \
616       },                                           \
617       .max_entries = {                             \
618          [MESA_SHADER_VERTEX]    = 352,            \
619          [MESA_SHADER_TESS_CTRL] = 128,            \
620          [MESA_SHADER_TESS_EVAL] = 208,            \
621          [MESA_SHADER_GEOMETRY]  = 128,            \
622       },                                           \
623    }
624 
625 #define GFX9_FEATURES                               \
626    GFX8_FEATURES,                                   \
627    GFX9_HW_INFO,                                    \
628    .has_sample_with_hiz = true,                     \
629    .has_illegal_ccs_values = true,                                    \
630    .cooperative_matrix_configurations = {                             \
631     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
632     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
633     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
634    }
635 
636 static const struct intel_device_info intel_device_info_skl_gt1 = {
637    GFX9_FEATURES, .gt = 1,
638    .platform = INTEL_PLATFORM_SKL,
639    .num_slices = 1,
640    .num_subslices = { 2, },
641    .max_eus_per_subslice = 6,
642    .l3_banks = 2,
643    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
644     * leading to some vertices to go missing if we use too much URB.
645     */
646    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
647    .simulator_id = 12,
648 };
649 
650 static const struct intel_device_info intel_device_info_skl_gt2 = {
651    GFX9_FEATURES, .gt = 2,
652    .platform = INTEL_PLATFORM_SKL,
653    .num_slices = 1,
654    .num_subslices = { 3, },
655    .max_eus_per_subslice = 8,
656    .l3_banks = 4,
657    .simulator_id = 12,
658 };
659 
660 static const struct intel_device_info intel_device_info_skl_gt3 = {
661    GFX9_FEATURES, .gt = 3,
662    .platform = INTEL_PLATFORM_SKL,
663    .num_slices = 2,
664    .num_subslices = { 3, 3, },
665    .max_eus_per_subslice = 8,
666    .l3_banks = 8,
667    .simulator_id = 12,
668 };
669 
670 static const struct intel_device_info intel_device_info_skl_gt4 = {
671    GFX9_FEATURES, .gt = 4,
672    .platform = INTEL_PLATFORM_SKL,
673    .num_slices = 3,
674    .num_subslices = { 3, 3, 3, },
675    .max_eus_per_subslice = 8,
676    .l3_banks = 12,
677    /* From the "L3 Allocation and Programming" documentation:
678     *
679     * "URB is limited to 1008KB due to programming restrictions.  This is not a
680     * restriction of the L3 implementation, but of the FF and other clients.
681     * Therefore, in a GT4 implementation it is possible for the programmed
682     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
683     * only 1008KB of this will be used."
684     */
685    .simulator_id = 12,
686 };
687 
688 static const struct intel_device_info intel_device_info_bxt = {
689    GFX9_LP_FEATURES_3X6,
690    .platform = INTEL_PLATFORM_BXT,
691    .l3_banks = 2,
692    .simulator_id = 14,
693 };
694 
695 static const struct intel_device_info intel_device_info_bxt_2x6 = {
696    GFX9_LP_FEATURES_2X6,
697    .platform = INTEL_PLATFORM_BXT,
698    .l3_banks = 1,
699    .simulator_id = 14,
700 };
701 /*
702  * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
703  * There's no KBL entry. Using the default SKL (GFX9) GS entries value.
704  */
705 
706 static const struct intel_device_info intel_device_info_kbl_gt1 = {
707    GFX9_FEATURES,
708    .platform = INTEL_PLATFORM_KBL,
709    .gt = 1,
710 
711    .max_cs_threads = 7 * 6,
712    .num_slices = 1,
713    .num_subslices = { 2, },
714    .max_eus_per_subslice = 6,
715    .l3_banks = 2,
716    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
717     * leading to some vertices to go missing if we use too much URB.
718     */
719    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
720    .urb.max_entries[MESA_SHADER_GEOMETRY] = 256,
721    .simulator_id = 16,
722 };
723 
724 static const struct intel_device_info intel_device_info_kbl_gt1_5 = {
725    GFX9_FEATURES,
726    .platform = INTEL_PLATFORM_KBL,
727    .gt = 1,
728 
729    .max_cs_threads = 7 * 6,
730    .num_slices = 1,
731    .num_subslices = { 3, },
732    .max_eus_per_subslice = 6,
733    .l3_banks = 4,
734    .simulator_id = 16,
735 };
736 
737 static const struct intel_device_info intel_device_info_kbl_gt2 = {
738    GFX9_FEATURES,
739    .platform = INTEL_PLATFORM_KBL,
740    .gt = 2,
741 
742    .num_slices = 1,
743    .num_subslices = { 3, },
744    .max_eus_per_subslice = 8,
745    .l3_banks = 4,
746    .simulator_id = 16,
747 };
748 
749 static const struct intel_device_info intel_device_info_kbl_gt3 = {
750    GFX9_FEATURES,
751    .platform = INTEL_PLATFORM_KBL,
752    .gt = 3,
753 
754    .num_slices = 2,
755    .num_subslices = { 3, 3, },
756    .max_eus_per_subslice = 8,
757    .l3_banks = 8,
758    .simulator_id = 16,
759 };
760 
761 static const struct intel_device_info intel_device_info_kbl_gt4 = {
762    GFX9_FEATURES,
763    .platform = INTEL_PLATFORM_KBL,
764    .gt = 4,
765 
766    /*
767     * From the "L3 Allocation and Programming" documentation:
768     *
769     * "URB is limited to 1008KB due to programming restrictions.  This
770     *  is not a restriction of the L3 implementation, but of the FF and
771     *  other clients.  Therefore, in a GT4 implementation it is
772     *  possible for the programmed allocation of the L3 data array to
773     *  provide 3*384KB=1152KB for URB, but only 1008KB of this
774     *  will be used."
775     */
776    .num_slices = 3,
777    .num_subslices = { 3, 3, 3, },
778    .max_eus_per_subslice = 8,
779    .l3_banks = 12,
780    .simulator_id = 16,
781 };
782 
783 static const struct intel_device_info intel_device_info_glk = {
784    GFX9_LP_FEATURES_3X6,
785    .platform = INTEL_PLATFORM_GLK,
786    .l3_banks = 2,
787    .simulator_id = 17,
788 };
789 
790 static const struct intel_device_info intel_device_info_glk_2x6 = {
791    GFX9_LP_FEATURES_2X6,
792    .platform = INTEL_PLATFORM_GLK,
793    .l3_banks = 2,
794    .simulator_id = 17,
795 };
796 
797 static const struct intel_device_info intel_device_info_cfl_gt1 = {
798    GFX9_FEATURES,
799    .platform = INTEL_PLATFORM_CFL,
800    .gt = 1,
801 
802    .num_slices = 1,
803    .num_subslices = { 2, },
804    .max_eus_per_subslice = 6,
805    .l3_banks = 2,
806    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
807     * leading to some vertices to go missing if we use too much URB.
808     */
809    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
810    .urb.max_entries[MESA_SHADER_GEOMETRY] = 256,
811    .simulator_id = 24,
812 };
813 static const struct intel_device_info intel_device_info_cfl_gt2 = {
814    GFX9_FEATURES,
815    .platform = INTEL_PLATFORM_CFL,
816    .gt = 2,
817 
818    .num_slices = 1,
819    .num_subslices = { 3, },
820    .max_eus_per_subslice = 8,
821    .l3_banks = 4,
822    .simulator_id = 24,
823 };
824 
825 static const struct intel_device_info intel_device_info_cfl_gt3 = {
826    GFX9_FEATURES,
827    .platform = INTEL_PLATFORM_CFL,
828    .gt = 3,
829 
830    .num_slices = 2,
831    .num_subslices = { 3, 3, },
832    .max_eus_per_subslice = 8,
833    .l3_banks = 8,
834    .simulator_id = 24,
835 };
836 
837 #define subslices(args...) { args, }
838 
839 #define GFX11_HW_INFO                               \
840    .ver = 11,                                       \
841    .has_pln = false,                                \
842    .max_vs_threads = 364,                           \
843    .max_gs_threads = 224,                           \
844    .max_tcs_threads = 224,                          \
845    .max_tes_threads = 364,                          \
846    .max_threads_per_psd = 64,                       \
847    .max_cs_threads = 56
848 
849 #define GFX11_FEATURES(_gt, _slices, _subslices, _l3, _platform)  \
850    GFX8_FEATURES,                                     \
851    GFX11_HW_INFO,                                     \
852    .platform = _platform,                             \
853    .has_64bit_float = false,                          \
854    .has_64bit_int = false,                            \
855    .has_integer_dword_mul = false,                    \
856    .has_sample_with_hiz = false,                      \
857    .has_illegal_ccs_values = true,                    \
858    .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
859    .num_subslices = _subslices,                       \
860    .max_eus_per_subslice = 8,                                         \
861    .cooperative_matrix_configurations = {                             \
862     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
863     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
864     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
865    }
866 
867 #define GFX11_URB_MIN_MAX_ENTRIES                     \
868    .min_entries = {                                   \
869       [MESA_SHADER_VERTEX]    = 64,                   \
870       [MESA_SHADER_TESS_EVAL] = 34,                   \
871    },                                                 \
872    .max_entries = {                                   \
873       [MESA_SHADER_VERTEX]    = 2384,                 \
874       [MESA_SHADER_TESS_CTRL] = 1032,                 \
875       [MESA_SHADER_TESS_EVAL] = 2384,                 \
876       [MESA_SHADER_GEOMETRY]  = 1032,                 \
877    }
878 
879 static const struct intel_device_info intel_device_info_icl_gt2 = {
880    GFX11_FEATURES(2, 1, subslices(8), 8, INTEL_PLATFORM_ICL),
881    .urb = {
882       GFX11_URB_MIN_MAX_ENTRIES,
883    },
884    .simulator_id = 19,
885 };
886 
887 static const struct intel_device_info intel_device_info_icl_gt1_5 = {
888    GFX11_FEATURES(1, 1, subslices(6), 6, INTEL_PLATFORM_ICL),
889    .urb = {
890       GFX11_URB_MIN_MAX_ENTRIES,
891    },
892    .simulator_id = 19,
893 };
894 
895 static const struct intel_device_info intel_device_info_icl_gt1 = {
896    GFX11_FEATURES(1, 1, subslices(4), 6, INTEL_PLATFORM_ICL),
897    .urb = {
898       GFX11_URB_MIN_MAX_ENTRIES,
899    },
900    .simulator_id = 19,
901 };
902 
903 static const struct intel_device_info intel_device_info_icl_gt0_5 = {
904    GFX11_FEATURES(1, 1, subslices(1), 6, INTEL_PLATFORM_ICL),
905    .urb = {
906       GFX11_URB_MIN_MAX_ENTRIES,
907    },
908    .simulator_id = 19,
909 };
910 
911 #define GFX11_LP_FEATURES                           \
912    .urb = {                                         \
913       GFX11_URB_MIN_MAX_ENTRIES,                    \
914    },                                               \
915    .disable_ccs_repack = true,                      \
916    .has_illegal_ccs_values = true,                  \
917    .simulator_id = 28
918 
919 static const struct intel_device_info intel_device_info_ehl_4x8 = {
920    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
921    GFX11_LP_FEATURES,
922 };
923 
924 static const struct intel_device_info intel_device_info_ehl_4x6 = {
925    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
926    GFX11_LP_FEATURES,
927    .max_eus_per_subslice = 6,
928 };
929 
930 static const struct intel_device_info intel_device_info_ehl_4x5 = {
931    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
932    GFX11_LP_FEATURES,
933    .max_eus_per_subslice = 5,
934 };
935 
936 static const struct intel_device_info intel_device_info_ehl_4x4 = {
937    GFX11_FEATURES(1, 1, subslices(4), 4, INTEL_PLATFORM_EHL),
938    GFX11_LP_FEATURES,
939    .max_eus_per_subslice = 4,
940 };
941 
942 static const struct intel_device_info intel_device_info_ehl_2x8 = {
943    GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL),
944    GFX11_LP_FEATURES,
945 };
946 
947 static const struct intel_device_info intel_device_info_ehl_2x4 = {
948    GFX11_FEATURES(1, 1, subslices(2), 4, INTEL_PLATFORM_EHL),
949    GFX11_LP_FEATURES,
950    .max_eus_per_subslice = 4,
951 };
952 
953 #define GFX12_HW_INFO                               \
954    .ver = 12,                                       \
955    .has_pln = false,                                \
956    .has_sample_with_hiz = false,                    \
957    .has_aux_map = true,                             \
958    .max_vs_threads = 546,                           \
959    .max_gs_threads = 336,                           \
960    .max_tcs_threads = 336,                          \
961    .max_tes_threads = 546,                          \
962    .max_threads_per_psd = 64,                       \
963    .max_cs_threads = 112, /* threads per DSS */     \
964    .urb = {                                         \
965       .size = 512, /* For intel_stub_gpu */         \
966       .min_entries = {                              \
967          [MESA_SHADER_VERTEX]    = 64,              \
968          [MESA_SHADER_TESS_EVAL] = 34,              \
969       },                                            \
970       .max_entries = {                              \
971          [MESA_SHADER_VERTEX]    = 3576,            \
972          [MESA_SHADER_TESS_CTRL] = 1548,            \
973          [MESA_SHADER_TESS_EVAL] = 3576,            \
974          [MESA_SHADER_GEOMETRY]  = 1548,            \
975       },                                            \
976    }
977 
978 #define GFX12_FEATURES(_gt, _slices, _l3)                       \
979    GFX8_FEATURES,                                               \
980    GFX12_HW_INFO,                                               \
981    .has_64bit_float = false,                                    \
982    .has_64bit_int = false,                                      \
983    .has_integer_dword_mul = false,                              \
984    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
985    .simulator_id = 22,                                          \
986    .max_eus_per_subslice = 16,                                  \
987    /* BSpec 45101 (r51017) */                                   \
988    .pat = {                                                     \
989          /* CPU: WB, GPU: PAT 0 => WB, 2WAY */                  \
990          .cached_coherent = PAT_ENTRY(0, WB),                   \
991          /* CPU: WC, GPU: PAT 1 => WC */                        \
992          .scanout = PAT_ENTRY(1, WC),                           \
993          /* CPU: WB, GPU: PAT 0 => WB, 2WAY */                  \
994          .writeback_incoherent = PAT_ENTRY(0, WB),              \
995          /* CPU: WC, GPU: PAT 1 => WC */                        \
996          .writecombining = PAT_ENTRY(1, WC),                    \
997    },                                                           \
998    .cooperative_matrix_configurations = {                       \
999     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
1000     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
1001     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
1002    }
1003 
1004 #define dual_subslices(args...) { args, }
1005 
1006 #define GFX12_GT05_FEATURES                                     \
1007    GFX12_FEATURES(1, 1, 4),                                     \
1008    .num_subslices = dual_subslices(1)
1009 
1010 #define GFX12_GT_FEATURES(_gt)                                  \
1011    GFX12_FEATURES(_gt, 1, _gt == 1 ? 4 : 8),                    \
1012    .num_subslices = dual_subslices(_gt == 1 ? 2 : 6)
1013 
1014 static const struct intel_device_info intel_device_info_tgl_gt1 = {
1015    GFX12_GT_FEATURES(1),
1016    .platform = INTEL_PLATFORM_TGL,
1017 };
1018 
1019 static const struct intel_device_info intel_device_info_tgl_gt2 = {
1020    GFX12_GT_FEATURES(2),
1021    .platform = INTEL_PLATFORM_TGL,
1022 };
1023 
1024 static const struct intel_device_info intel_device_info_rkl_gt05 = {
1025    GFX12_GT05_FEATURES,
1026    .platform = INTEL_PLATFORM_RKL,
1027 };
1028 
1029 static const struct intel_device_info intel_device_info_rkl_gt1 = {
1030    GFX12_GT_FEATURES(1),
1031    .platform = INTEL_PLATFORM_RKL,
1032 };
1033 
1034 static const struct intel_device_info intel_device_info_adl_gt05 = {
1035    GFX12_GT05_FEATURES,
1036    .platform = INTEL_PLATFORM_ADL,
1037 };
1038 
1039 static const struct intel_device_info intel_device_info_adl_gt1 = {
1040    GFX12_GT_FEATURES(1),
1041    .platform = INTEL_PLATFORM_ADL,
1042 };
1043 
1044 static const struct intel_device_info intel_device_info_adl_n = {
1045    GFX12_GT_FEATURES(1),
1046    .platform = INTEL_PLATFORM_ADL,
1047    .is_adl_n = true,
1048 };
1049 
1050 static const struct intel_device_info intel_device_info_adl_gt2 = {
1051    GFX12_GT_FEATURES(2),
1052    .platform = INTEL_PLATFORM_ADL,
1053 };
1054 
1055 static const struct intel_device_info intel_device_info_rpl = {
1056    GFX12_FEATURES(1, 1, 4),
1057    .num_subslices = dual_subslices(2),
1058    .platform = INTEL_PLATFORM_RPL,
1059 };
1060 
1061 static const struct intel_device_info intel_device_info_rpl_p = {
1062    GFX12_GT_FEATURES(2),
1063    .platform = INTEL_PLATFORM_RPL,
1064 };
1065 
1066 #define GFX12_DG1_SG1_FEATURES                           \
1067    GFX12_GT_FEATURES(2),                                 \
1068    .platform = INTEL_PLATFORM_DG1,                       \
1069    .has_llc = false,                                     \
1070    .has_local_mem = true,                                \
1071    .urb.size = 768,                                      \
1072    .simulator_id = 30
1073 
1074 static const struct intel_device_info intel_device_info_dg1 = {
1075    GFX12_DG1_SG1_FEATURES,
1076 };
1077 
1078 static const struct intel_device_info intel_device_info_sg1 = {
1079    GFX12_DG1_SG1_FEATURES,
1080 };
1081 
1082 #define XEHP_URB_MIN_MAX_ENTRIES                        \
1083    .min_entries = {                                     \
1084       [MESA_SHADER_VERTEX]    = 64,                     \
1085       [MESA_SHADER_TESS_EVAL] = 34,                     \
1086    },                                                   \
1087    .max_entries = {                                     \
1088       [MESA_SHADER_VERTEX]    = 3832, /* BSpec 47138 */ \
1089       [MESA_SHADER_TESS_CTRL] = 1548, /* BSpec 47137 */ \
1090       [MESA_SHADER_TESS_EVAL] = 3576, /* BSpec 47135 */ \
1091       [MESA_SHADER_GEOMETRY]  = 1548, /* BSpec 47136 */ \
1092    }
1093 
1094 #define XEHP_FEATURES(_gt, _slices, _l3)                        \
1095    GFX8_FEATURES,                                               \
1096    .needs_null_push_constant_tbimr_workaround = true,           \
1097    .has_64bit_float = false,                                    \
1098    .has_64bit_int = false,                                      \
1099    .has_integer_dword_mul = false,                              \
1100    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
1101    .num_subslices = dual_subslices(1), /* updated by topology */\
1102    .ver = 12,                                                   \
1103    .has_pln = false,                                            \
1104    .has_sample_with_hiz = false,                                \
1105    .max_vs_threads = 546,  /* BSpec 46312 */                    \
1106    .max_gs_threads = 336,  /* BSpec 46299 */                    \
1107    .max_tcs_threads = 336, /* BSpec 46300 */                    \
1108    .max_tes_threads = 546, /* BSpec 46298 */                    \
1109    .max_threads_per_psd = 64,                                   \
1110    .max_cs_threads = 112, /* threads per DSS */                 \
1111    .urb = {                                                     \
1112       .size = 768, /* For intel_stub_gpu */                     \
1113       XEHP_URB_MIN_MAX_ENTRIES,                                 \
1114    },                                                           \
1115    .num_thread_per_eu = 8 /* BSpec 44472 */,                    \
1116    .max_eus_per_subslice = 16,                                  \
1117    .verx10 = 125,                                               \
1118    .has_llc = false,                                            \
1119    .has_lsc = true,                                             \
1120    .has_local_mem = true,                                       \
1121    .has_aux_map = false,                                        \
1122    .simulator_id = 29,                                          \
1123    .cooperative_matrix_configurations = {                       \
1124     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
1125     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
1126     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 8, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
1127    }
1128 
1129 #define DG2_FEATURES                                            \
1130    /* (Sub)slice info comes from the kernel topology info */    \
1131    XEHP_FEATURES(0, 1, 0),                                      \
1132    .revision = 4, /* For offline compiler */                    \
1133    .has_coarse_pixel_primitive_and_cb = true,                   \
1134    .has_mesh_shading = true,                                    \
1135    .has_ray_tracing = true,                                     \
1136    .has_flat_ccs = true,                                        \
1137    /* There is no PAT table for DG2, using TGL ones */          \
1138    /* BSpec 45101 (r51017) */                                   \
1139    .pat = {                                                     \
1140          /* CPU: WB, GPU: PAT 0 => WB, 2WAY */                  \
1141          .cached_coherent = PAT_ENTRY(0, WB),                   \
1142          /* CPU: WC, GPU: PAT 1 => WC */                        \
1143          .scanout = PAT_ENTRY(1, WC),                           \
1144          /* CPU: WB, GPU: PAT 0 => WB, 2WAY */                  \
1145          .writeback_incoherent = PAT_ENTRY(0, WB),              \
1146          /* CPU: WC, GPU: PAT 1 => WC */                        \
1147          .writecombining = PAT_ENTRY(1, WC),                    \
1148    }
1149 
1150 static const struct intel_device_info intel_device_info_dg2_g10 = {
1151    DG2_FEATURES,
1152    .platform = INTEL_PLATFORM_DG2_G10,
1153 };
1154 
1155 static const struct intel_device_info intel_device_info_dg2_g11 = {
1156    DG2_FEATURES,
1157    .platform = INTEL_PLATFORM_DG2_G11,
1158 };
1159 
1160 static const struct intel_device_info intel_device_info_dg2_g12 = {
1161    DG2_FEATURES,
1162    .platform = INTEL_PLATFORM_DG2_G12,
1163 };
1164 
1165 static const struct intel_device_info intel_device_info_atsm_g10 = {
1166    DG2_FEATURES,
1167    .platform = INTEL_PLATFORM_ATSM_G10,
1168 };
1169 
1170 static const struct intel_device_info intel_device_info_atsm_g11 = {
1171    DG2_FEATURES,
1172    .platform = INTEL_PLATFORM_ATSM_G11,
1173 };
1174 
1175 #define MTL_FEATURES                                            \
1176    /* (Sub)slice info comes from the kernel topology info */    \
1177    XEHP_FEATURES(0, 1, 0),                                      \
1178    .has_local_mem = false,                                      \
1179    .has_aux_map = true,                                         \
1180    .has_64bit_float = true,                                     \
1181    .has_64bit_float_via_math_pipe = true,                       \
1182    .has_integer_dword_mul = false,                              \
1183    .has_coarse_pixel_primitive_and_cb = true,                   \
1184    .has_mesh_shading = true,                                    \
1185    .has_ray_tracing = true,                                     \
1186    /* BSpec 45101 (r51017) */                                   \
1187    .pat = {                                                     \
1188          /* CPU: WB, GPU: PAT 3 => WB, 1WAY */                  \
1189          .cached_coherent = PAT_ENTRY(3, WB),                   \
1190          /* CPU: WC, GPU: PAT 1 => WC */                        \
1191          .scanout = PAT_ENTRY(1, WC),                           \
1192          /* CPU: WB, GPU: PAT 0 => WB, 0WAY */                  \
1193          .writeback_incoherent = PAT_ENTRY(0, WB),              \
1194          /* CPU: WC, GPU: PAT 1 => WC */                        \
1195          .writecombining = PAT_ENTRY(1, WC),                    \
1196    }
1197 
1198 static const struct intel_device_info intel_device_info_mtl_u = {
1199    MTL_FEATURES,
1200    .platform = INTEL_PLATFORM_MTL_U,
1201 };
1202 
1203 static const struct intel_device_info intel_device_info_mtl_h = {
1204    MTL_FEATURES,
1205    .platform = INTEL_PLATFORM_MTL_H,
1206 };
1207 
1208 static const struct intel_device_info intel_device_info_arl_u = {
1209    MTL_FEATURES,
1210    .platform = INTEL_PLATFORM_ARL_U,
1211 };
1212 
1213 static const struct intel_device_info intel_device_info_arl_h = {
1214    MTL_FEATURES,
1215    .platform = INTEL_PLATFORM_ARL_H,
1216 };
1217 
1218 #define XE2_FEATURES                                            \
1219    /* (Sub)slice info comes from the kernel topology info */    \
1220    XEHP_FEATURES(0, 1, 0),                                      \
1221    .ver = 20,                                                   \
1222    .verx10 = 200,                                               \
1223    .num_subslices = dual_subslices(1),                          \
1224    .grf_size = 64,                                              \
1225    .needs_null_push_constant_tbimr_workaround = false,          \
1226    .has_64bit_float = true,                                     \
1227    .has_64bit_int = true,                                       \
1228    .has_integer_dword_mul = false,                              \
1229    .has_coarse_pixel_primitive_and_cb = true,                   \
1230    .has_mesh_shading = true,                                    \
1231    .has_ray_tracing = true,                                     \
1232    .has_indirect_unroll = true,                                 \
1233    /* BSpec 71582 (r59285) */                                   \
1234    .pat = {                                                     \
1235       /* CPU: WB, GPU: PAT 1 => WB, 1WAY */                     \
1236       .cached_coherent = PAT_ENTRY(1, WB),                      \
1237       /* CPU: WC, GPU: PAT 6 => XD */                           \
1238       .scanout = PAT_ENTRY(6, WC),                              \
1239       /* CPU: WC, GPU: PAT 0 => WB */                           \
1240       .writecombining = PAT_ENTRY(0, WC),                       \
1241       /* CPU: WC, GPU: PAT 11 => XD, compressed */              \
1242       .compressed = PAT_ENTRY(11, WC)                           \
1243    },                                                           \
1244    .cooperative_matrix_configurations = {                       \
1245     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT16, INTEL_CMAT_FLOAT32, INTEL_CMAT_FLOAT32 }, \
1246     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_SINT8, INTEL_CMAT_SINT8, INTEL_CMAT_SINT32, INTEL_CMAT_SINT32 },       \
1247     { INTEL_CMAT_SCOPE_SUBGROUP, 8, 16, 32, INTEL_CMAT_UINT8, INTEL_CMAT_UINT8, INTEL_CMAT_UINT32, INTEL_CMAT_UINT32 },       \
1248    },                                                           \
1249    .has_flat_ccs = true
1250 
1251 static const struct intel_device_info intel_device_info_bmg = {
1252    XE2_FEATURES,
1253    .platform = INTEL_PLATFORM_BMG,
1254    .has_local_mem = true,
1255 };
1256 
1257 static const struct intel_device_info intel_device_info_lnl = {
1258    XE2_FEATURES,
1259    .platform = INTEL_PLATFORM_LNL,
1260    .has_local_mem = false,
1261 };
1262 
1263 void
intel_device_info_topology_reset_masks(struct intel_device_info * devinfo)1264 intel_device_info_topology_reset_masks(struct intel_device_info *devinfo)
1265 {
1266    devinfo->subslice_slice_stride = 0;
1267    devinfo->eu_subslice_stride = 0;
1268    devinfo->eu_slice_stride = 0;
1269 
1270    devinfo->num_slices = 0;
1271    memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1272 
1273    memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1274    memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1275    memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1276    memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));
1277 }
1278 
1279 void
intel_device_info_topology_update_counts(struct intel_device_info * devinfo)1280 intel_device_info_topology_update_counts(struct intel_device_info *devinfo)
1281 {
1282    devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1283    devinfo->subslice_total = 0;
1284    for (int s = 0; s < devinfo->max_slices; s++) {
1285       if (!intel_device_info_slice_available(devinfo, s))
1286          continue;
1287 
1288       for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1289          devinfo->num_subslices[s] +=
1290             __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);
1291       }
1292       devinfo->subslice_total += devinfo->num_subslices[s];
1293    }
1294    assert(devinfo->num_slices > 0);
1295    assert(devinfo->subslice_total > 0);
1296 }
1297 
1298 void
intel_device_info_update_pixel_pipes(struct intel_device_info * devinfo,uint8_t * subslice_masks)1299 intel_device_info_update_pixel_pipes(struct intel_device_info *devinfo, uint8_t *subslice_masks)
1300 {
1301    if (devinfo->ver < 11)
1302       return;
1303 
1304    /* The kernel only reports one slice on all existing ICL+ platforms, even
1305     * if multiple slices are present. The slice mask is allowed to have the
1306     * accurate value greater than 1 on gfx12.5+ platforms though, in order to
1307     * be tolerant with the behavior of our simulation environment.
1308     */
1309    assert(devinfo->slice_masks == 1 || devinfo->verx10 >= 125);
1310 
1311    /* Count the number of subslices on each pixel pipe. Assume that every
1312     * contiguous group of 4 subslices in the mask belong to the same pixel
1313     * pipe. However note that on TGL+ the kernel returns a mask of enabled
1314     * *dual* subslices instead of actual subslices somewhat confusingly, so
1315     * each pixel pipe only takes 2 bits in the mask even though it's still 4
1316     * subslices.
1317     */
1318    const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4;
1319    for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) {
1320       const unsigned offset = p * ppipe_bits;
1321       const unsigned subslice_idx = offset /
1322          devinfo->max_subslices_per_slice * devinfo->subslice_slice_stride;
1323       const unsigned ppipe_mask =
1324          BITFIELD_RANGE(offset % devinfo->max_subslices_per_slice, ppipe_bits);
1325 
1326       if (subslice_idx < ARRAY_SIZE(devinfo->subslice_masks))
1327          devinfo->ppipe_subslices[p] =
1328             __builtin_popcount(subslice_masks[subslice_idx] & ppipe_mask);
1329       else
1330          devinfo->ppipe_subslices[p] = 0;
1331    }
1332 }
1333 
1334 void
intel_device_info_update_l3_banks(struct intel_device_info * devinfo)1335 intel_device_info_update_l3_banks(struct intel_device_info *devinfo)
1336 {
1337    if (devinfo->ver != 12)
1338       return;
1339 
1340    if (devinfo->verx10 >= 125) {
1341       if (devinfo->subslice_total > 16) {
1342          assert(devinfo->subslice_total <= 32);
1343          devinfo->l3_banks = 32;
1344       } else if (devinfo->subslice_total > 8) {
1345          devinfo->l3_banks = 16;
1346       } else {
1347          devinfo->l3_banks = 8;
1348       }
1349    } else {
1350       assert(devinfo->num_slices == 1);
1351       if (devinfo->subslice_total >= 6) {
1352          assert(devinfo->subslice_total == 6);
1353          devinfo->l3_banks = 8;
1354       } else if (devinfo->subslice_total > 2) {
1355          devinfo->l3_banks = 6;
1356       } else {
1357          devinfo->l3_banks = 4;
1358       }
1359    }
1360 }
1361 
1362 /* Returns the number of EUs of the first subslice enabled */
1363 uint32_t
intel_device_info_get_eu_count_first_subslice(const struct intel_device_info * devinfo)1364 intel_device_info_get_eu_count_first_subslice(const struct intel_device_info *devinfo)
1365 {
1366    uint32_t first_subslice, first_slice, offset, i;
1367    uint32_t eu_count = 0;
1368 
1369    first_slice = ffs(devinfo->slice_masks);
1370    first_slice--;
1371    offset = first_slice * devinfo->subslice_slice_stride;
1372 
1373    for (i = 0; i < DIV_ROUND_UP(devinfo->max_subslices_per_slice, 8); i++) {
1374       first_subslice = ffs(devinfo->subslice_masks[offset + i]);
1375 
1376       if (first_subslice == 0)
1377          continue;
1378 
1379       break;
1380    }
1381 
1382    assert(first_subslice > 0);
1383    first_subslice--;
1384    offset = first_slice * devinfo->eu_slice_stride +
1385             first_subslice * devinfo->eu_subslice_stride;
1386    for (i = 0; i < DIV_ROUND_UP(devinfo->max_eus_per_subslice, 8); i++)
1387       eu_count += __builtin_popcount(devinfo->eu_masks[offset + i]);
1388 
1389    assert(eu_count > 0);
1390    return eu_count;
1391 }
1392 
1393 /* Generate mask from the device data. */
1394 static void
fill_masks(struct intel_device_info * devinfo)1395 fill_masks(struct intel_device_info *devinfo)
1396 {
1397    /* All of our internal device descriptions assign the same number of
1398     * subslices for each slice. Just verify that this is true.
1399     */
1400    for (int s = 1; s < devinfo->num_slices; s++)
1401       assert(devinfo->num_subslices[0] == devinfo->num_subslices[s]);
1402 
1403    intel_device_info_i915_update_from_masks(devinfo,
1404                           (1U << devinfo->num_slices) - 1,
1405                           (1U << devinfo->num_subslices[0]) - 1,
1406                           devinfo->num_slices * devinfo->num_subslices[0] *
1407                           devinfo->max_eus_per_subslice);
1408 }
1409 
1410 void
intel_device_info_update_cs_workgroup_threads(struct intel_device_info * devinfo)1411 intel_device_info_update_cs_workgroup_threads(struct intel_device_info *devinfo)
1412 {
1413    /* GPGPU_WALKER::ThreadWidthCounterMaximum is U6-1 so the most threads we
1414     * can program is 64 without going up to a rectangular group. This only
1415     * impacts Haswell and TGL which have higher thread counts.
1416     *
1417     * INTERFACE_DESCRIPTOR_DATA::NumberofThreadsinGPGPUThreadGroup on Xe-HP+
1418     * is 10 bits so we have no such restrictions.
1419     */
1420    devinfo->max_cs_workgroup_threads =
1421       devinfo->verx10 >= 125 ? devinfo->max_cs_threads :
1422                                MIN2(devinfo->max_cs_threads, 64);
1423 }
1424 
1425 static bool
parse_force_probe_entry(int pci_id,const char * entry,bool * force_on,bool * force_off)1426 parse_force_probe_entry(int pci_id, const char *entry, bool *force_on,
1427                         bool *force_off)
1428 {
1429    const char *cp = entry;
1430 
1431    bool negated = *cp == '!';
1432    if (negated)
1433       cp++;
1434 
1435    if (*cp == '\0')
1436       return false;
1437 
1438    bool wildcard = *cp == '*';
1439    long val = 0;
1440 
1441    if (wildcard) {
1442       cp++;
1443    } else {
1444       char *end;
1445       val = strtol(cp, &end, 16);
1446       if (end == cp)
1447          return false;
1448       cp = end;
1449    }
1450 
1451    if (*cp != '\0')
1452       return false;
1453 
1454    bool matched = wildcard || (long)pci_id == val;
1455    if (matched) {
1456       *force_on = !negated;
1457       *force_off = negated;
1458    }
1459 
1460    return matched;
1461 }
1462 
1463 static void
scan_for_force_probe(int pci_id,bool * force_on,bool * force_off)1464 scan_for_force_probe(int pci_id, bool *force_on, bool *force_off)
1465 {
1466    *force_on = false;
1467    *force_off = false;
1468 
1469    const char *env = getenv("INTEL_FORCE_PROBE");
1470    if (env == NULL)
1471       return;
1472 
1473    size_t len = strlen(env);
1474    if (len == 0)
1475       return;
1476 
1477    char *dup = strndup(env, len);
1478    if (dup == NULL)
1479       return;
1480 
1481    for (char *entry = strtok(dup, ","); entry; entry = strtok(NULL, ","))
1482       parse_force_probe_entry(pci_id, entry, force_on, force_off);
1483 
1484    free(dup);
1485    assert(!*force_on || !*force_off);
1486 }
1487 
1488 struct device_init_config {
1489    bool require_force_probe;
1490 };
1491 
1492 /* Example PCI ID entry using FORCE_PROBE:
1493  *
1494  * CHIPSET(0x1234, foo, "FOO", "Intel(R) Graphics", FORCE_PROBE)
1495  */
1496 #define FORCE_PROBE .require_force_probe = true
1497 
1498 static bool
intel_device_info_init_common(int pci_id,bool building,struct intel_device_info * devinfo)1499 intel_device_info_init_common(int pci_id, bool building,
1500                               struct intel_device_info *devinfo)
1501 {
1502    struct device_init_config device_config = { 0 };
1503    switch (pci_id) {
1504 #undef CHIPSET
1505 #define CHIPSET(id, family, fam_str, name, ...)                         \
1506       case id:                                                          \
1507          *devinfo = intel_device_info_##family;                         \
1508          device_config = *&(struct device_init_config) { __VA_ARGS__ }; \
1509          break;
1510 #include "pci_ids/crocus_pci_ids.h"
1511 #include "pci_ids/iris_pci_ids.h"
1512 
1513 #undef CHIPSET
1514 #define CHIPSET(id, fam_str, name) \
1515       case id: *devinfo = intel_device_info_gfx3; break;
1516 #include "pci_ids/i915_pci_ids.h"
1517 
1518    default:
1519       mesa_logw("Driver does not support the 0x%x PCI ID.", pci_id);
1520       return false;
1521    }
1522 
1523    switch (pci_id) {
1524 #undef CHIPSET
1525 #define CHIPSET(_id, _family, _fam_str, _name, ...) \
1526    case _id: \
1527       /* sizeof(str_literal) includes the null */ \
1528       STATIC_ASSERT(sizeof(_name) + sizeof(_fam_str) + 2 <= \
1529                     sizeof(devinfo->name)); \
1530       strncpy(devinfo->name, _name " (" _fam_str ")", sizeof(devinfo->name)); \
1531       break;
1532 #include "pci_ids/crocus_pci_ids.h"
1533 #include "pci_ids/iris_pci_ids.h"
1534    default:
1535       strncpy(devinfo->name, "Intel Unknown", sizeof(devinfo->name));
1536    }
1537 
1538    bool force_on = false;
1539    bool force_off = false;
1540    if (building)
1541       force_on = true;
1542    else
1543       scan_for_force_probe(pci_id, &force_on, &force_off);
1544    devinfo->probe_forced = force_on;
1545    if (force_off) {
1546       mesa_logw("%s (0x%x) disabled with INTEL_FORCE_PROBE", devinfo->name,
1547                 pci_id);
1548       return false;
1549    } else if (device_config.require_force_probe) {
1550       if (force_on) {
1551          if (!building)
1552             mesa_logw("Forcing probe of unsupported: %s (0x%x)", devinfo->name,
1553                       pci_id);
1554       } else {
1555          mesa_loge("%s (0x%x) requires INTEL_FORCE_PROBE", devinfo->name,
1556                    pci_id);
1557          return false;
1558       }
1559    }
1560 
1561    devinfo->pci_device_id = pci_id;
1562 
1563    fill_masks(devinfo);
1564 
1565    /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1566     *
1567     * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
1568     *  allocate scratch space enough so that each slice has 4 slices allowed."
1569     *
1570     * The equivalent internal documentation says that this programming note
1571     * applies to all Gfx9+ platforms.
1572     *
1573     * The hardware typically calculates the scratch space pointer by taking
1574     * the base address, and adding per-thread-scratch-space * thread ID.
1575     * Extra padding can be necessary depending how the thread IDs are
1576     * calculated for a particular shader stage.
1577     */
1578 
1579    switch(devinfo->ver) {
1580    case 9:
1581       devinfo->max_wm_threads = 64 /* threads-per-PSD */
1582                               * devinfo->num_slices
1583                               * 4; /* effective subslices per slice */
1584       break;
1585    case 11:
1586    case 12:
1587    case 20:
1588       devinfo->max_wm_threads = 128 /* threads-per-PSD */
1589                               * devinfo->num_slices
1590                               * 8; /* subslices per slice */
1591       break;
1592    default:
1593       assert(devinfo->ver < 9);
1594       break;
1595    }
1596 
1597    assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1598 
1599    if (devinfo->verx10 == 0)
1600       devinfo->verx10 = devinfo->ver * 10;
1601 
1602    uint16_t major = devinfo->ver;
1603    uint16_t minor = (devinfo->verx10 - (devinfo->ver * 10)) * 10;
1604    /* When supported gfx_ip_ver will be overwritten by values read from KMD.
1605     * This is a approximation for platforms that do not support GMD ID or
1606     * when running offline tools.
1607     * verx10 125 becomes GFX_IP_VER(12, 50) for example.
1608     */
1609    devinfo->gfx_ip_ver = GFX_IP_VER(major, minor);
1610 
1611    if (devinfo->has_mesh_shading) {
1612       /* Half of push constant space matches the size used in the simplest
1613        * primitive pipeline (VS + FS). Tweaking this affects performance.
1614        */
1615       devinfo->mesh_max_constant_urb_size_kb =
1616             devinfo->max_constant_urb_size_kb / 2;
1617    }
1618 
1619    /*
1620     * Gfx 12.5 moved scratch to a surface and SURFTYPE_SCRATCH has this pitch
1621     * restriction:
1622     *
1623     * BSpec 43862 (r52666)
1624     * RENDER_SURFACE_STATE::Surface Pitch
1625     *    For surfaces of type SURFTYPE_SCRATCH, valid range of pitch is:
1626     *    [63,262143] -> [64B, 256KB]
1627     *
1628     * The pitch of the surface is the scratch size per thread and the surface
1629     * should be large enough to accommodate every physical thread.
1630     */
1631    devinfo->max_scratch_size_per_thread = devinfo->verx10 >= 125 ?
1632                                           (256 * 1024) : (2 * 1024 * 1024);
1633    intel_device_info_update_cs_workgroup_threads(devinfo);
1634 
1635    return true;
1636 }
1637 
1638 static void
intel_device_info_apply_workarounds(struct intel_device_info * devinfo)1639 intel_device_info_apply_workarounds(struct intel_device_info *devinfo)
1640 {
1641    if (intel_needs_workaround(devinfo, 18012660806))
1642       devinfo->urb.max_entries[MESA_SHADER_GEOMETRY] = 1536;
1643 
1644    /* Fixes issues with:
1645     * dEQP-GLES31.functional.geometry_shading.layered.render_with_default_layer_cubemap
1646     * when running on GFX12 platforms with small EU count.
1647     */
1648    const uint32_t eu_total = intel_device_info_eu_total(devinfo);
1649    if (devinfo->verx10 == 120 && eu_total <= 32)
1650       devinfo->urb.max_entries[MESA_SHADER_GEOMETRY] = 1024;
1651 }
1652 
1653 static bool
intel_get_device_info_from_pci_id_common(int pci_id,bool building,struct intel_device_info * devinfo)1654 intel_get_device_info_from_pci_id_common(int pci_id, bool building,
1655                                          struct intel_device_info *devinfo)
1656 {
1657    intel_device_info_init_common(pci_id, building, devinfo);
1658 
1659    /* This is a placeholder until a proper value is set. */
1660    devinfo->kmd_type = INTEL_KMD_TYPE_I915;
1661 
1662    intel_device_info_init_was(devinfo);
1663    intel_device_info_apply_workarounds(devinfo);
1664 
1665    return true;
1666 }
1667 
1668 bool
intel_get_device_info_from_pci_id(int pci_id,struct intel_device_info * devinfo)1669 intel_get_device_info_from_pci_id(int pci_id,
1670                                   struct intel_device_info *devinfo)
1671 {
1672    return intel_get_device_info_from_pci_id_common(pci_id, false, devinfo);
1673 }
1674 
1675 bool
intel_get_device_info_for_build(int pci_id,struct intel_device_info * devinfo)1676 intel_get_device_info_for_build(int pci_id,
1677                                 struct intel_device_info *devinfo)
1678 {
1679    return intel_get_device_info_from_pci_id_common(pci_id, true, devinfo);
1680 }
1681 
1682 bool
intel_device_info_compute_system_memory(struct intel_device_info * devinfo,bool update)1683 intel_device_info_compute_system_memory(struct intel_device_info *devinfo, bool update)
1684 {
1685    if (!update) {
1686       if (!os_get_total_physical_memory(&devinfo->mem.sram.mappable.size))
1687          return false;
1688    }
1689 
1690    os_get_available_system_memory(&devinfo->mem.sram.mappable.free);
1691 
1692    return true;
1693 }
1694 
1695 static void
intel_device_info_adjust_memory(struct intel_device_info * devinfo)1696 intel_device_info_adjust_memory(struct intel_device_info *devinfo)
1697 {
1698    uint64_t available;
1699 
1700    /* Applications running without elevated privileges don't report valid
1701     * numbers for free sram
1702     */
1703    if (os_get_available_system_memory(&available)) {
1704       devinfo->mem.sram.mappable.free = MIN3(devinfo->mem.sram.mappable.free,
1705                                              devinfo->mem.sram.mappable.size,
1706                                              available);
1707    }
1708 }
1709 
1710 static void
init_max_scratch_ids(struct intel_device_info * devinfo)1711 init_max_scratch_ids(struct intel_device_info *devinfo)
1712 {
1713    /* Determine the max number of subslices that potentially might be used in
1714     * scratch space ids.
1715     *
1716     * For, Gfx11+, scratch space allocation is based on the number of threads
1717     * in the base configuration.
1718     *
1719     * For Gfx9, devinfo->subslice_total is the TOTAL number of subslices and
1720     * we wish to view that there are 4 subslices per slice instead of the
1721     * actual number of subslices per slice. The documentation for 3DSTATE_PS
1722     * "Scratch Space Base Pointer" says:
1723     *
1724     *    "Scratch Space per slice is computed based on 4 sub-slices.  SW
1725     *     must allocate scratch space enough so that each slice has 4
1726     *     slices allowed."
1727     *
1728     * According to the other driver team, this applies to compute shaders
1729     * as well.  This is not currently documented at all.
1730     *
1731     * For Gfx8 and older we user devinfo->subslice_total.
1732     */
1733    unsigned subslices;
1734    if (devinfo->verx10 == 125)
1735       subslices = 32;
1736    else if (devinfo->ver == 12)
1737       subslices = (devinfo->platform == INTEL_PLATFORM_DG1 || devinfo->gt == 2 ? 6 : 2);
1738    else if (devinfo->ver == 11)
1739       subslices = 8;
1740    else if (devinfo->ver >= 9 && devinfo->ver < 11)
1741       subslices = 4 * devinfo->num_slices;
1742    else
1743       subslices = devinfo->subslice_total;
1744    assert(subslices >= devinfo->subslice_total);
1745 
1746    unsigned scratch_ids_per_subslice;
1747    if (devinfo->ver >= 12) {
1748       /* Same as ICL below, but with 16 EUs. */
1749       scratch_ids_per_subslice = 16 * 8;
1750    } else if (devinfo->ver >= 11) {
1751       /* The MEDIA_VFE_STATE docs say:
1752        *
1753        *    "Starting with this configuration, the Maximum Number of
1754        *     Threads must be set to (#EU * 8) for GPGPU dispatches.
1755        *
1756        *     Although there are only 7 threads per EU in the configuration,
1757        *     the FFTID is calculated as if there are 8 threads per EU,
1758        *     which in turn requires a larger amount of Scratch Space to be
1759        *     allocated by the driver."
1760        */
1761       scratch_ids_per_subslice = 8 * 8;
1762    } else if (devinfo->platform == INTEL_PLATFORM_HSW) {
1763       /* WaCSScratchSize:hsw
1764        *
1765        * Haswell's scratch space address calculation appears to be sparse
1766        * rather than tightly packed. The Thread ID has bits indicating
1767        * which subslice, EU within a subslice, and thread within an EU it
1768        * is. There's a maximum of two slices and two subslices, so these
1769        * can be stored with a single bit. Even though there are only 10 EUs
1770        * per subslice, this is stored in 4 bits, so there's an effective
1771        * maximum value of 16 EUs. Similarly, although there are only 7
1772        * threads per EU, this is stored in a 3 bit number, giving an
1773        * effective maximum value of 8 threads per EU.
1774        *
1775        * This means that we need to use 16 * 8 instead of 10 * 7 for the
1776        * number of threads per subslice.
1777        */
1778       scratch_ids_per_subslice = 16 * 8;
1779    } else if (devinfo->platform == INTEL_PLATFORM_CHV) {
1780       /* Cherryview devices have either 6 or 8 EUs per subslice, and each
1781        * EU has 7 threads. The 6 EU devices appear to calculate thread IDs
1782        * as if it had 8 EUs.
1783        */
1784       scratch_ids_per_subslice = 8 * 7;
1785    } else {
1786       scratch_ids_per_subslice = devinfo->max_cs_threads;
1787    }
1788 
1789    unsigned max_thread_ids = scratch_ids_per_subslice * subslices;
1790 
1791    if (devinfo->verx10 >= 125) {
1792       /* On GFX version 12.5, scratch access changed to a surface-based model.
1793        * Instead of each shader type having its own layout based on IDs passed
1794        * from the relevant fixed-function unit, all scratch access is based on
1795        * thread IDs like it always has been for compute.
1796        */
1797       for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++)
1798          devinfo->max_scratch_ids[i] = max_thread_ids;
1799    } else {
1800       unsigned max_scratch_ids[] = {
1801          [MESA_SHADER_VERTEX]    = devinfo->max_vs_threads,
1802          [MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
1803          [MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
1804          [MESA_SHADER_GEOMETRY]  = devinfo->max_gs_threads,
1805          [MESA_SHADER_FRAGMENT]  = devinfo->max_wm_threads,
1806          [MESA_SHADER_COMPUTE]   = max_thread_ids,
1807       };
1808       STATIC_ASSERT(sizeof(devinfo->max_scratch_ids) == sizeof(max_scratch_ids));
1809       memcpy(devinfo->max_scratch_ids, max_scratch_ids,
1810              sizeof(devinfo->max_scratch_ids));
1811    }
1812 }
1813 
1814 static unsigned
intel_device_info_calc_engine_prefetch(const struct intel_device_info * devinfo,enum intel_engine_class engine_class)1815 intel_device_info_calc_engine_prefetch(const struct intel_device_info *devinfo,
1816                                        enum intel_engine_class engine_class)
1817 {
1818    if (devinfo->verx10 >= 200) {
1819       switch (engine_class) {
1820       case INTEL_ENGINE_CLASS_RENDER:
1821          return 4096;
1822       case INTEL_ENGINE_CLASS_COMPUTE:
1823          return 1024;
1824       default:
1825          return 512;
1826       }
1827    }
1828 
1829    if (intel_device_info_is_mtl_or_arl(devinfo)) {
1830       switch (engine_class) {
1831       case INTEL_ENGINE_CLASS_RENDER:
1832          return 2048;
1833       case INTEL_ENGINE_CLASS_COMPUTE:
1834          return 1024;
1835       default:
1836          return 512;
1837       }
1838    }
1839 
1840    /* DG2 */
1841    if (devinfo->verx10 == 125)
1842       return 1024;
1843 
1844    /* Older than DG2/MTL */
1845    return 512;
1846 }
1847 
1848 bool
intel_get_device_info_from_fd(int fd,struct intel_device_info * devinfo,int min_ver,int max_ver)1849 intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo, int min_ver, int max_ver)
1850 {
1851    if (NULL != getenv("INTEL_STUB_GPU_JSON")) {
1852       /* This call will succeed when shim-drm has been initialized with a
1853        * serialized intel_device_info structure.
1854        */
1855       struct drm_intel_stub_devinfo arg = {
1856          .addr = (uintptr_t)devinfo,
1857          .size = sizeof(*devinfo),
1858       };
1859       if (0 == intel_ioctl(fd, DRM_IOCTL_INTEL_STUB_DEVINFO, &arg)) {
1860          intel_device_info_init_was(devinfo);
1861          intel_device_info_apply_workarounds(devinfo);
1862          return true;
1863       }
1864    }
1865 
1866    /* Get PCI info.
1867     *
1868     * Some callers may already have a valid drm device which holds values of
1869     * PCI fields queried here prior to calling this function. But making this
1870     * query optional leads to a more cumbersome implementation. These callers
1871     * still need to initialize the fields somewhere out of this function and
1872     * rely on an ioctl to get PCI device id for the next step when skipping
1873     * this drm query.
1874     */
1875    drmDevicePtr drmdev = NULL;
1876    if (drmGetDevice2(fd, DRM_DEVICE_GET_PCI_REVISION, &drmdev)) {
1877       mesa_loge("Failed to query drm device.");
1878       return false;
1879    }
1880    if (!intel_device_info_init_common(drmdev->deviceinfo.pci->device_id,
1881                                       false, devinfo)) {
1882       drmFreeDevice(&drmdev);
1883       return false;
1884    }
1885 
1886    if ((min_ver > 0 && devinfo->ver < min_ver) || (max_ver > 0 && devinfo->ver > max_ver)) {
1887       drmFreeDevice(&drmdev);
1888       return false;
1889    }
1890 
1891    devinfo->pci_domain = drmdev->businfo.pci->domain;
1892    devinfo->pci_bus = drmdev->businfo.pci->bus;
1893    devinfo->pci_dev = drmdev->businfo.pci->dev;
1894    devinfo->pci_func = drmdev->businfo.pci->func;
1895    devinfo->pci_device_id = drmdev->deviceinfo.pci->device_id;
1896    devinfo->pci_revision_id = drmdev->deviceinfo.pci->revision_id;
1897    drmFreeDevice(&drmdev);
1898    devinfo->no_hw = debug_get_bool_option("INTEL_NO_HW", false);
1899 
1900    devinfo->kmd_type = intel_get_kmd_type(fd);
1901    if (devinfo->kmd_type == INTEL_KMD_TYPE_INVALID) {
1902       mesa_loge("Unknown kernel mode driver");
1903       return false;
1904    }
1905 
1906    /* remaining initialization queries the kernel for device info */
1907    if (devinfo->no_hw) {
1908       /* Provide some sensible values for NO_HW. */
1909       devinfo->gtt_size =
1910          devinfo->ver >= 8 ? (1ull << 48) : 2ull * 1024 * 1024 * 1024;
1911       intel_device_info_compute_system_memory(devinfo, false);
1912       return true;
1913    }
1914 
1915    bool ret;
1916    switch (devinfo->kmd_type) {
1917    case INTEL_KMD_TYPE_I915:
1918       ret = intel_device_info_i915_get_info_from_fd(fd, devinfo);
1919       break;
1920    case INTEL_KMD_TYPE_XE:
1921       ret = intel_device_info_xe_get_info_from_fd(fd, devinfo);
1922       if (devinfo->verx10 < 200)
1923          mesa_logw("Support for this platform is experimental with Xe KMD, bug reports may be ignored.");
1924       break;
1925    default:
1926       ret = false;
1927       unreachable("Missing");
1928    }
1929    if (!ret) {
1930       mesa_logw("Could not get intel_device_info.");
1931       return false;
1932    }
1933 
1934    /* region info is required for lmem support */
1935    if (devinfo->has_local_mem && !devinfo->mem.use_class_instance) {
1936       mesa_logw("Could not query local memory size.");
1937       return false;
1938    }
1939 
1940    intel_device_info_adjust_memory(devinfo);
1941 
1942    /* Gfx7 and older do not support EU/Subslice info */
1943    assert(devinfo->subslice_total >= 1 || devinfo->ver <= 7);
1944    devinfo->subslice_total = MAX2(devinfo->subslice_total, 1);
1945 
1946    init_max_scratch_ids(devinfo);
1947 
1948    for (enum intel_engine_class engine = INTEL_ENGINE_CLASS_RENDER;
1949         engine < ARRAY_SIZE(devinfo->engine_class_prefetch); engine++)
1950       devinfo->engine_class_prefetch[engine] =
1951             intel_device_info_calc_engine_prefetch(devinfo, engine);
1952 
1953    intel_device_info_init_was(devinfo);
1954    intel_device_info_apply_workarounds(devinfo);
1955 
1956    return true;
1957 }
1958 
intel_device_info_update_memory_info(struct intel_device_info * devinfo,int fd)1959 bool intel_device_info_update_memory_info(struct intel_device_info *devinfo, int fd)
1960 {
1961    bool ret;
1962 
1963    switch (devinfo->kmd_type) {
1964    case INTEL_KMD_TYPE_I915:
1965       ret = intel_device_info_i915_query_regions(devinfo, fd, true);
1966       break;
1967    case INTEL_KMD_TYPE_XE:
1968       ret = intel_device_info_xe_query_regions(fd, devinfo, true);
1969       break;
1970    default:
1971       ret = false;
1972    }
1973 
1974    if (ret)
1975       intel_device_info_adjust_memory(devinfo);
1976    return ret;
1977 }
1978 
1979 void
intel_device_info_update_after_hwconfig(struct intel_device_info * devinfo)1980 intel_device_info_update_after_hwconfig(struct intel_device_info *devinfo)
1981 {
1982    /* After applying hwconfig values, some items need to be recalculated. */
1983    devinfo->max_cs_threads =
1984       devinfo->max_eus_per_subslice * devinfo->num_thread_per_eu;
1985 
1986    intel_device_info_update_cs_workgroup_threads(devinfo);
1987 }
1988 
1989 enum intel_wa_steppings
intel_device_info_wa_stepping(struct intel_device_info * devinfo)1990 intel_device_info_wa_stepping(struct intel_device_info *devinfo)
1991 {
1992    /* When adding platforms to this function, check to see if
1993     * stepping-specific workarounds impact the compiler.
1994     *
1995     * If a stepping specific compiler workaround is required on a released
1996     * platform, intel_device_info->revision must be added as a
1997     * 'compiler_field' in intel_device_info.py
1998     */
1999 
2000    if (devinfo->platform == INTEL_PLATFORM_BMG) {
2001       switch (devinfo->revision) {
2002       case 0:
2003          return INTEL_STEPPING_A0;
2004       case 1:
2005          return INTEL_STEPPING_A1;
2006       case 4:
2007          return INTEL_STEPPING_B0;
2008       default:
2009          return INTEL_STEPPING_RELEASE;
2010       }
2011    } else if (devinfo->platform == INTEL_PLATFORM_LNL) {
2012       switch (devinfo->revision) {
2013       case 0:
2014          return INTEL_STEPPING_A0;
2015       case 1:
2016          return INTEL_STEPPING_A1;
2017       case 4:
2018          return INTEL_STEPPING_B0;
2019       default:
2020          return INTEL_STEPPING_RELEASE;
2021       }
2022    } else if (devinfo->platform == INTEL_PLATFORM_TGL) {
2023       /* TGL production steppings: B0 and C0 */
2024       switch (devinfo->revision) {
2025       case 1:
2026          return INTEL_STEPPING_B0;
2027       case 3:
2028          return INTEL_STEPPING_C0;
2029       default:
2030          return INTEL_STEPPING_RELEASE;
2031       }
2032    }
2033 
2034    /* all other platforms support only released steppings */
2035    return INTEL_STEPPING_RELEASE;
2036 }
2037 
2038 uint32_t
intel_device_info_get_max_slm_size(const struct intel_device_info * devinfo)2039 intel_device_info_get_max_slm_size(const struct intel_device_info *devinfo)
2040 {
2041    uint32_t bytes = 0;
2042 
2043    if (devinfo->verx10 >= 200) {
2044       bytes = intel_device_info_get_max_preferred_slm_size(devinfo);
2045    } else {
2046       bytes = 64 * 1024;
2047    }
2048 
2049    return bytes;
2050 }
2051 
2052 uint32_t
intel_device_info_get_max_preferred_slm_size(const struct intel_device_info * devinfo)2053 intel_device_info_get_max_preferred_slm_size(const struct intel_device_info *devinfo)
2054 {
2055    uint32_t k_bytes = 0;
2056 
2057    if (devinfo->verx10 >= 200) {
2058       if (intel_needs_workaround(devinfo, 16018610683))
2059          k_bytes = 128;
2060       else
2061          k_bytes = 160;
2062    } else {
2063       k_bytes = 128;
2064    }
2065 
2066    return k_bytes * 1024;
2067 }
2068