xref: /aosp_15_r20/external/mesa3d/src/freedreno/common/freedreno_devices.py (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1#
2# Copyright © 2021 Google, Inc.
3#
4# SPDX-License-Identifier: MIT
5
6from mako.template import Template
7import sys
8import argparse
9from enum import Enum
10
11def max_bitfield_val(high, low, shift):
12    return ((1 << (high - low)) - 1) << shift
13
14
15parser = argparse.ArgumentParser()
16parser.add_argument('-p', '--import-path', required=True)
17args = parser.parse_args()
18sys.path.insert(0, args.import_path)
19
20from a6xx import *
21
22
23class CHIP(Enum):
24    A2XX = 2
25    A3XX = 3
26    A4XX = 4
27    A5XX = 5
28    A6XX = 6
29    A7XX = 7
30
31class CCUColorCacheFraction(Enum):
32    FULL = 0
33    HALF = 1
34    QUARTER = 2
35    EIGHTH = 3
36
37
38class State(object):
39    def __init__(self):
40        # List of unique device-info structs, multiple different GPU ids
41        # can map to a single info struct in cases where the differences
42        # are not sw visible, or the only differences are parameters
43        # queried from the kernel (like GMEM size)
44        self.gpu_infos = []
45
46        # Table mapping GPU id to device-info struct
47        self.gpus = {}
48
49    def info_index(self, gpu_info):
50        i = 0
51        for info in self.gpu_infos:
52            if gpu_info == info:
53                return i
54            i += 1
55        raise Error("invalid info")
56
57s = State()
58
59def add_gpus(ids, info):
60    for id in ids:
61        s.gpus[id] = info
62
63class GPUId(object):
64    def __init__(self, gpu_id = None, chip_id = None, name=None):
65        if chip_id == None:
66            assert(gpu_id != None)
67            val = gpu_id
68            core = int(val / 100)
69            val -= (core * 100);
70            major = int(val / 10);
71            val -= (major * 10)
72            minor = val
73            chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff
74        self.chip_id = chip_id
75        if gpu_id == None:
76            gpu_id = 0
77        self.gpu_id = gpu_id
78        if name == None:
79            assert(gpu_id != 0)
80            name = "FD%d" % gpu_id
81        self.name = name
82
83class Struct(object):
84    """A helper class that stringifies itself to a 'C' struct initializer
85    """
86    def __str__(self):
87        s = "{"
88        for name, value in vars(self).items():
89            s += "." + name + "=" + str(value) + ","
90        return s + "}"
91
92class GPUInfo(Struct):
93    """Base class for any generation of adreno, consists of GMEM layout
94       related parameters
95
96       Note that tile_max_h is normally only constrained by corresponding
97       bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h
98       tends to have lower limits, in which case a comment will describe
99       the bitfield size/shift
100    """
101    def __init__(self, chip, gmem_align_w, gmem_align_h,
102                 tile_align_w, tile_align_h,
103                 tile_max_w, tile_max_h, num_vsc_pipes,
104                 cs_shared_mem_size, num_sp_cores, wave_granularity, fibers_per_sp,
105                 threadsize_base = 64, max_waves = 16):
106        self.chip          = chip.value
107        self.gmem_align_w  = gmem_align_w
108        self.gmem_align_h  = gmem_align_h
109        self.tile_align_w  = tile_align_w
110        self.tile_align_h  = tile_align_h
111        self.tile_max_w    = tile_max_w
112        self.tile_max_h    = tile_max_h
113        self.num_vsc_pipes = num_vsc_pipes
114        self.cs_shared_mem_size = cs_shared_mem_size
115        self.num_sp_cores  = num_sp_cores
116        self.wave_granularity = wave_granularity
117        self.fibers_per_sp = fibers_per_sp
118        self.threadsize_base = threadsize_base
119        self.max_waves     = max_waves
120
121        s.gpu_infos.append(self)
122
123
124class A6xxGPUInfo(GPUInfo):
125    """The a6xx generation has a lot more parameters, and is broken down
126       into distinct sub-generations.  The template parameter avoids
127       duplication of parameters that are unique to the sub-generation.
128    """
129    def __init__(self, chip, template, num_ccu,
130                 tile_align_w, tile_align_h, num_vsc_pipes,
131                 cs_shared_mem_size, wave_granularity, fibers_per_sp,
132                 magic_regs, raw_magic_regs = None, threadsize_base = 64,
133                 max_waves = 16):
134        if chip == CHIP.A6XX:
135            tile_max_w   = 1024 # max_bitfield_val(5, 0, 5)
136            tile_max_h   = max_bitfield_val(14, 8, 4) # 1008
137        else:
138            tile_max_w   = 1728
139            tile_max_h   = 1728
140
141        super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4,
142                         tile_align_w = tile_align_w,
143                         tile_align_h = tile_align_h,
144                         tile_max_w   = tile_max_w,
145                         tile_max_h   = tile_max_h,
146                         num_vsc_pipes = num_vsc_pipes,
147                         cs_shared_mem_size = cs_shared_mem_size,
148                         num_sp_cores = num_ccu, # The # of SP cores seems to always match # of CCU
149                         wave_granularity   = wave_granularity,
150                         fibers_per_sp      = fibers_per_sp,
151                         threadsize_base    = threadsize_base,
152                         max_waves    = max_waves)
153
154        self.num_ccu = num_ccu
155
156        self.a6xx = Struct()
157        self.a7xx = Struct()
158
159        self.a6xx.magic = Struct()
160
161        for name, val in magic_regs.items():
162            setattr(self.a6xx.magic, name, val)
163
164        if raw_magic_regs:
165            self.a6xx.magic_raw = [[int(r[0]), r[1]] for r in raw_magic_regs]
166
167        templates = template if type(template) is list else [template]
168        for template in templates:
169            template.apply_props(self)
170
171
172    def __str__(self):
173     return super(A6xxGPUInfo, self).__str__().replace('[', '{').replace("]", "}")
174
175
176# a2xx is really two sub-generations, a20x and a22x, but we don't currently
177# capture that in the device-info tables
178add_gpus([
179        GPUId(200),
180        GPUId(201),
181        GPUId(205),
182        GPUId(220),
183    ], GPUInfo(
184        CHIP.A2XX,
185        gmem_align_w = 32,  gmem_align_h = 32,
186        tile_align_w = 32,  tile_align_h = 32,
187        tile_max_w   = 512,
188        tile_max_h   = ~0, # TODO
189        num_vsc_pipes = 8,
190        cs_shared_mem_size = 0,
191        num_sp_cores = 0, # TODO
192        wave_granularity = 2,
193        fibers_per_sp = 0, # TODO
194        threadsize_base = 8, # TODO: Confirm this
195    ))
196
197add_gpus([
198        GPUId(305),
199        GPUId(307),
200        GPUId(320),
201        GPUId(330),
202        GPUId(chip_id=0x03000512, name="FD305B"),
203        GPUId(chip_id=0x03000620, name="FD306A"),
204    ], GPUInfo(
205        CHIP.A3XX,
206        gmem_align_w = 32,  gmem_align_h = 32,
207        tile_align_w = 32,  tile_align_h = 32,
208        tile_max_w   = 992, # max_bitfield_val(4, 0, 5)
209        tile_max_h   = max_bitfield_val(9, 5, 5),
210        num_vsc_pipes = 8,
211        cs_shared_mem_size = 32 * 1024,
212        num_sp_cores = 0, # TODO
213        wave_granularity = 2,
214        fibers_per_sp = 0, # TODO
215        threadsize_base = 8,
216    ))
217
218add_gpus([
219        GPUId(405),
220        GPUId(420),
221        GPUId(430),
222    ], GPUInfo(
223        CHIP.A4XX,
224        gmem_align_w = 32,  gmem_align_h = 32,
225        tile_align_w = 32,  tile_align_h = 32,
226        tile_max_w   = 1024, # max_bitfield_val(4, 0, 5)
227        tile_max_h   = max_bitfield_val(9, 5, 5),
228        num_vsc_pipes = 8,
229        cs_shared_mem_size = 32 * 1024,
230        num_sp_cores = 0, # TODO
231        wave_granularity = 2,
232        fibers_per_sp = 0, # TODO
233        threadsize_base = 32, # TODO: Confirm this
234    ))
235
236add_gpus([
237        GPUId(505),
238        GPUId(506),
239        GPUId(508),
240        GPUId(509),
241    ], GPUInfo(
242        CHIP.A5XX,
243        gmem_align_w = 64,  gmem_align_h = 32,
244        tile_align_w = 64,  tile_align_h = 32,
245        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
246        tile_max_h   = max_bitfield_val(16, 9, 5),
247        num_vsc_pipes = 16,
248        cs_shared_mem_size = 32 * 1024,
249        num_sp_cores = 1,
250        wave_granularity = 2,
251        fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
252        threadsize_base = 32,
253    ))
254
255add_gpus([
256        GPUId(510),
257        GPUId(512),
258    ], GPUInfo(
259        CHIP.A5XX,
260        gmem_align_w = 64,  gmem_align_h = 32,
261        tile_align_w = 64,  tile_align_h = 32,
262        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
263        tile_max_h   = max_bitfield_val(16, 9, 5),
264        num_vsc_pipes = 16,
265        cs_shared_mem_size = 32 * 1024,
266        num_sp_cores = 2,
267        wave_granularity = 2,
268        fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
269        threadsize_base = 32,
270    ))
271
272add_gpus([
273        GPUId(530),
274        GPUId(540),
275    ], GPUInfo(
276        CHIP.A5XX,
277        gmem_align_w = 64,  gmem_align_h = 32,
278        tile_align_w = 64,  tile_align_h = 32,
279        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
280        tile_max_h   = max_bitfield_val(16, 9, 5),
281        num_vsc_pipes = 16,
282        cs_shared_mem_size = 32 * 1024,
283        num_sp_cores = 4,
284        wave_granularity = 2,
285        fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
286        threadsize_base = 32,
287    ))
288
289
290class A6XXProps(dict):
291    unique_props = dict()
292    def apply_gen_props(self, gen, gpu_info):
293        for name, val in self.items():
294            setattr(getattr(gpu_info, gen), name, val)
295            A6XXProps.unique_props[(name, gen)] = val
296
297    def apply_props(self, gpu_info):
298        self.apply_gen_props("a6xx", gpu_info)
299
300
301class A7XXProps(A6XXProps):
302    def apply_props(self, gpu_info):
303        self.apply_gen_props("a7xx", gpu_info)
304
305
306# Props could be modified with env var:
307#  FD_DEV_FEATURES=%feature_name%=%value%:%feature_name%=%value%:...
308# e.g.
309#  FD_DEV_FEATURES=has_fs_tex_prefetch=0:max_sets=4
310
311a6xx_base = A6XXProps(
312        has_cp_reg_write = True,
313        has_8bpp_ubwc = True,
314        has_gmem_fast_clear = True,
315        has_hw_multiview = True,
316        has_fs_tex_prefetch = True,
317        has_sampler_minmax = True,
318
319        supports_double_threadsize = True,
320
321        sysmem_per_ccu_depth_cache_size = 64 * 1024,
322        sysmem_per_ccu_color_cache_size = 64 * 1024,
323        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
324
325        prim_alloc_threshold = 0x7,
326        vs_max_inputs_count = 32,
327        max_sets = 5,
328        line_width_min = 1.0,
329        line_width_max = 1.0,
330    )
331
332
333# a6xx can be divided into distinct sub-generations, where certain device-
334# info parameters are keyed to the sub-generation.  These templates reduce
335# the copypaste
336
337a6xx_gen1_low = A6XXProps(
338        reg_size_vec4 = 48,
339        instr_cache_size = 64,
340        indirect_draw_wfm_quirk = True,
341        depth_bounds_require_depth_test_quirk = True,
342
343        has_gmem_fast_clear = False,
344        has_hw_multiview = False,
345        has_sampler_minmax = False,
346        has_fs_tex_prefetch = False,
347        sysmem_per_ccu_color_cache_size = 8 * 1024,
348        sysmem_per_ccu_depth_cache_size = 8 * 1024,
349        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value,
350        vs_max_inputs_count = 16,
351        supports_double_threadsize = False,
352    )
353
354a6xx_gen1 = A6XXProps(
355        reg_size_vec4 = 96,
356        instr_cache_size = 64,
357        indirect_draw_wfm_quirk = True,
358        depth_bounds_require_depth_test_quirk = True,
359    )
360
361a6xx_gen2 = A6XXProps(
362        reg_size_vec4 = 96,
363        instr_cache_size = 64, # TODO
364        supports_multiview_mask = True,
365        has_z24uint_s8uint = True,
366        indirect_draw_wfm_quirk = True,
367        depth_bounds_require_depth_test_quirk = True, # TODO: check if true
368        has_dp2acc = False, # TODO: check if true
369        has_8bpp_ubwc = False,
370    )
371
372a6xx_gen3 = A6XXProps(
373        reg_size_vec4 = 64,
374        # Blob limits it to 128 but we hang with 128
375        instr_cache_size = 127,
376        supports_multiview_mask = True,
377        has_z24uint_s8uint = True,
378        tess_use_shared = True,
379        storage_16bit = True,
380        has_tex_filter_cubic = True,
381        has_separate_chroma_filter = True,
382        has_sample_locations = True,
383        has_8bpp_ubwc = False,
384        has_dp2acc = True,
385        has_lrz_dir_tracking = True,
386        enable_lrz_fast_clear = True,
387        lrz_track_quirk = True,
388        has_lrz_feedback = True,
389        has_per_view_viewport = True,
390        has_scalar_alu = True,
391        has_early_preamble = True,
392    )
393
394a6xx_gen4 = A6XXProps(
395        reg_size_vec4 = 64,
396        # Blob limits it to 128 but we hang with 128
397        instr_cache_size = 127,
398        supports_multiview_mask = True,
399        has_z24uint_s8uint = True,
400        tess_use_shared = True,
401        storage_16bit = True,
402        has_tex_filter_cubic = True,
403        has_separate_chroma_filter = True,
404        has_sample_locations = True,
405        has_cp_reg_write = False,
406        has_8bpp_ubwc = False,
407        has_lpac = True,
408        has_shading_rate = True,
409        has_getfiberid = True,
410        has_dp2acc = True,
411        has_dp4acc = True,
412        enable_lrz_fast_clear = True,
413        has_lrz_dir_tracking = True,
414        has_lrz_feedback = True,
415        has_per_view_viewport = True,
416        has_scalar_alu = True,
417        has_isam_v = True,
418        has_ssbo_imm_offsets = True,
419        # TODO: there seems to be a quirk where at least rcp can't be in an
420        # early preamble. a660 at least is affected.
421        #has_early_preamble = True,
422    )
423
424a6xx_a690_quirk = A6XXProps(
425        broken_ds_ubwc_quirk = True,
426    )
427
428add_gpus([
429        GPUId(605), # TODO: Test it, based only on libwrapfake dumps
430        GPUId(608), # TODO: Test it, based only on libwrapfake dumps
431        GPUId(610),
432        GPUId(612), # TODO: Test it, based only on libwrapfake dumps
433    ], A6xxGPUInfo(
434        CHIP.A6XX,
435        [a6xx_base, a6xx_gen1_low],
436        num_ccu = 1,
437        tile_align_w = 32,
438        tile_align_h = 16,
439        num_vsc_pipes = 16,
440        cs_shared_mem_size = 16 * 1024,
441        wave_granularity = 1,
442        fibers_per_sp = 128 * 16,
443        magic_regs = dict(
444            PC_POWER_CNTL = 0,
445            TPL1_DBG_ECO_CNTL = 0,
446            GRAS_DBG_ECO_CNTL = 0,
447            SP_CHICKEN_BITS = 0,
448            UCHE_CLIENT_PF = 0x00000004,
449            PC_MODE_CNTL = 0xf,
450            SP_DBG_ECO_CNTL = 0x0,
451            RB_DBG_ECO_CNTL = 0x04100000,
452            RB_DBG_ECO_CNTL_blit = 0x04100000,
453            HLSQ_DBG_ECO_CNTL = 0,
454            RB_UNKNOWN_8E01 = 0x00000001,
455            VPC_DBG_ECO_CNTL = 0x0,
456            UCHE_UNKNOWN_0E12 = 0x10000000,
457        ),
458    ))
459
460add_gpus([
461        GPUId(615),
462        GPUId(616),
463        GPUId(618),
464        GPUId(619),
465    ], A6xxGPUInfo(
466        CHIP.A6XX,
467        [a6xx_base, a6xx_gen1],
468        num_ccu = 1,
469        tile_align_w = 32,
470        tile_align_h = 32,
471        num_vsc_pipes = 32,
472        cs_shared_mem_size = 32 * 1024,
473        wave_granularity = 2,
474        fibers_per_sp = 128 * 16,
475        magic_regs = dict(
476            PC_POWER_CNTL = 0,
477            TPL1_DBG_ECO_CNTL = 0x00108000,
478            GRAS_DBG_ECO_CNTL = 0x00000880,
479            SP_CHICKEN_BITS = 0x00000430,
480            UCHE_CLIENT_PF = 0x00000004,
481            PC_MODE_CNTL = 0x1f,
482            SP_DBG_ECO_CNTL = 0x0,
483            RB_DBG_ECO_CNTL = 0x04100000,
484            RB_DBG_ECO_CNTL_blit = 0x04100000,
485            HLSQ_DBG_ECO_CNTL = 0x00080000,
486            RB_UNKNOWN_8E01 = 0x00000001,
487            VPC_DBG_ECO_CNTL = 0x0,
488            UCHE_UNKNOWN_0E12 = 0x00000001
489        )
490    ))
491
492add_gpus([
493        GPUId(620),
494    ], A6xxGPUInfo(
495        CHIP.A6XX,
496        [a6xx_base, a6xx_gen1],
497        num_ccu = 1,
498        tile_align_w = 32,
499        tile_align_h = 16,
500        num_vsc_pipes = 32,
501        cs_shared_mem_size = 32 * 1024,
502        wave_granularity = 2,
503        fibers_per_sp = 128 * 16,
504        magic_regs = dict(
505            PC_POWER_CNTL = 0,
506            TPL1_DBG_ECO_CNTL = 0x01008000,
507            GRAS_DBG_ECO_CNTL = 0x0,
508            SP_CHICKEN_BITS = 0x00000400,
509            UCHE_CLIENT_PF = 0x00000004,
510            PC_MODE_CNTL = 0x1f,
511            SP_DBG_ECO_CNTL = 0x01000000,
512            RB_DBG_ECO_CNTL = 0x04100000,
513            RB_DBG_ECO_CNTL_blit = 0x04100000,
514            HLSQ_DBG_ECO_CNTL = 0x0,
515            RB_UNKNOWN_8E01 = 0x0,
516            VPC_DBG_ECO_CNTL = 0x02000000,
517            UCHE_UNKNOWN_0E12 = 0x00000001
518        )
519    ))
520
521add_gpus([
522        GPUId(chip_id=0xffff06020100, name="FD621"),
523    ], A6xxGPUInfo(
524        CHIP.A6XX,
525        [a6xx_base, a6xx_gen3, A6XXProps(lrz_track_quirk = False)],
526        num_ccu = 2,
527        tile_align_w = 96,
528        tile_align_h = 16,
529        num_vsc_pipes = 32,
530        cs_shared_mem_size = 32 * 1024,
531        wave_granularity = 2,
532        fibers_per_sp = 128 * 2 * 16,
533        magic_regs = dict(
534            PC_POWER_CNTL = 0,
535            # this seems to be a chicken bit that fixes cubic filtering:
536            TPL1_DBG_ECO_CNTL = 0x01008000,
537            GRAS_DBG_ECO_CNTL = 0x0,
538            SP_CHICKEN_BITS = 0x00001400,
539            # UCHE_CLIENT_PF = 0x00000004,
540            PC_MODE_CNTL = 0x1f,
541            SP_DBG_ECO_CNTL = 0x03000000,
542            RB_DBG_ECO_CNTL = 0x04100000,
543            RB_DBG_ECO_CNTL_blit = 0x04100000,
544            HLSQ_DBG_ECO_CNTL = 0x0,
545            RB_UNKNOWN_8E01 = 0x0,
546            VPC_DBG_ECO_CNTL = 0x02000000,
547            UCHE_UNKNOWN_0E12 = 0x00000001
548        )
549    ))
550
551add_gpus([
552        GPUId(630),
553    ], A6xxGPUInfo(
554        CHIP.A6XX,
555        [a6xx_base, a6xx_gen1],
556        num_ccu = 2,
557        tile_align_w = 32,
558        tile_align_h = 16,
559        num_vsc_pipes = 32,
560        cs_shared_mem_size = 32 * 1024,
561        wave_granularity = 2,
562        fibers_per_sp = 128 * 16,
563        magic_regs = dict(
564            PC_POWER_CNTL = 1,
565            TPL1_DBG_ECO_CNTL = 0x00108000,
566            GRAS_DBG_ECO_CNTL = 0x00000880,
567            SP_CHICKEN_BITS = 0x00001430,
568            UCHE_CLIENT_PF = 0x00000004,
569            PC_MODE_CNTL = 0x1f,
570            SP_DBG_ECO_CNTL = 0x0,
571            RB_DBG_ECO_CNTL = 0x04100000,
572            RB_DBG_ECO_CNTL_blit = 0x05100000,
573            HLSQ_DBG_ECO_CNTL = 0x00080000,
574            RB_UNKNOWN_8E01 = 0x00000001,
575            VPC_DBG_ECO_CNTL = 0x0,
576            UCHE_UNKNOWN_0E12 = 0x10000001
577        )
578    ))
579
580add_gpus([
581        GPUId(640),
582    ], A6xxGPUInfo(
583        CHIP.A6XX,
584        [a6xx_base, a6xx_gen2],
585        num_ccu = 2,
586        tile_align_w = 32,
587        tile_align_h = 16,
588        num_vsc_pipes = 32,
589        cs_shared_mem_size = 32 * 1024,
590        wave_granularity = 2,
591        fibers_per_sp = 128 * 4 * 16,
592        magic_regs = dict(
593            PC_POWER_CNTL = 1,
594            TPL1_DBG_ECO_CNTL = 0x00008000,
595            GRAS_DBG_ECO_CNTL = 0x0,
596            SP_CHICKEN_BITS = 0x00000420,
597            UCHE_CLIENT_PF = 0x00000004,
598            PC_MODE_CNTL = 0x1f,
599            SP_DBG_ECO_CNTL = 0x0,
600            RB_DBG_ECO_CNTL = 0x04100000,
601            RB_DBG_ECO_CNTL_blit = 0x04100000,
602            HLSQ_DBG_ECO_CNTL = 0x0,
603            RB_UNKNOWN_8E01 = 0x00000001,
604            VPC_DBG_ECO_CNTL = 0x02000000,
605            UCHE_UNKNOWN_0E12 = 0x00000001
606        )
607    ))
608
609add_gpus([
610        GPUId(680),
611    ], A6xxGPUInfo(
612        CHIP.A6XX,
613        [a6xx_base, a6xx_gen2],
614        num_ccu = 4,
615        tile_align_w = 64,
616        tile_align_h = 32,
617        num_vsc_pipes = 32,
618        cs_shared_mem_size = 32 * 1024,
619        wave_granularity = 2,
620        fibers_per_sp = 128 * 4 * 16,
621        magic_regs = dict(
622            PC_POWER_CNTL = 3,
623            TPL1_DBG_ECO_CNTL = 0x00108000,
624            GRAS_DBG_ECO_CNTL = 0x0,
625            SP_CHICKEN_BITS = 0x00001430,
626            UCHE_CLIENT_PF = 0x00000004,
627            PC_MODE_CNTL = 0x1f,
628            SP_DBG_ECO_CNTL = 0x0,
629            RB_DBG_ECO_CNTL = 0x04100000,
630            RB_DBG_ECO_CNTL_blit = 0x04100000,
631            HLSQ_DBG_ECO_CNTL = 0x0,
632            RB_UNKNOWN_8E01 = 0x00000001,
633            VPC_DBG_ECO_CNTL = 0x02000000,
634            UCHE_UNKNOWN_0E12 = 0x00000001
635        )
636    ))
637
638add_gpus([
639        GPUId(650),
640    ], A6xxGPUInfo(
641        CHIP.A6XX,
642        [a6xx_base, a6xx_gen3],
643        num_ccu = 3,
644        tile_align_w = 96,
645        tile_align_h = 16,
646        num_vsc_pipes = 32,
647        cs_shared_mem_size = 32 * 1024,
648        wave_granularity = 2,
649        fibers_per_sp = 128 * 2 * 16,
650        magic_regs = dict(
651            PC_POWER_CNTL = 2,
652            # this seems to be a chicken bit that fixes cubic filtering:
653            TPL1_DBG_ECO_CNTL = 0x01008000,
654            GRAS_DBG_ECO_CNTL = 0x0,
655            SP_CHICKEN_BITS = 0x00001400,
656            UCHE_CLIENT_PF = 0x00000004,
657            PC_MODE_CNTL = 0x1f,
658            SP_DBG_ECO_CNTL = 0x01000000,
659            RB_DBG_ECO_CNTL = 0x04100000,
660            RB_DBG_ECO_CNTL_blit = 0x04100000,
661            HLSQ_DBG_ECO_CNTL = 0x0,
662            RB_UNKNOWN_8E01 = 0x0,
663            VPC_DBG_ECO_CNTL = 0x02000000,
664            UCHE_UNKNOWN_0E12 = 0x00000001
665        )
666    ))
667
668add_gpus([
669        GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"),
670        GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"),
671        GPUId(chip_id=0x006006030500, name="Adreno 7c+ Gen 3 Lite"),
672        GPUId(chip_id=0x00ac06030500, name="FD643"), # e.g. QCM6490, Fairphone 5
673        # fallback wildcard entry should be last:
674        GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"),
675    ], A6xxGPUInfo(
676        CHIP.A6XX,
677        [a6xx_base, a6xx_gen4],
678        num_ccu = 2,
679        tile_align_w = 32,
680        tile_align_h = 16,
681        num_vsc_pipes = 32,
682        cs_shared_mem_size = 32 * 1024,
683        wave_granularity = 2,
684        fibers_per_sp = 128 * 2 * 16,
685        magic_regs = dict(
686            PC_POWER_CNTL = 1,
687            TPL1_DBG_ECO_CNTL = 0x05008000,
688            GRAS_DBG_ECO_CNTL = 0x0,
689            SP_CHICKEN_BITS = 0x00001400,
690            UCHE_CLIENT_PF = 0x00000084,
691            PC_MODE_CNTL = 0x1f,
692            SP_DBG_ECO_CNTL = 0x00000006,
693            RB_DBG_ECO_CNTL = 0x04100000,
694            RB_DBG_ECO_CNTL_blit = 0x04100000,
695            HLSQ_DBG_ECO_CNTL = 0x0,
696            RB_UNKNOWN_8E01 = 0x0,
697            VPC_DBG_ECO_CNTL = 0x02000000,
698            UCHE_UNKNOWN_0E12 = 0x00000001
699        )
700    ))
701
702add_gpus([
703        GPUId(660),
704    ], A6xxGPUInfo(
705        CHIP.A6XX,
706        [a6xx_base, a6xx_gen4],
707        num_ccu = 3,
708        tile_align_w = 96,
709        tile_align_h = 16,
710        num_vsc_pipes = 32,
711        cs_shared_mem_size = 32 * 1024,
712        wave_granularity = 2,
713        fibers_per_sp = 128 * 2 * 16,
714        magic_regs = dict(
715            PC_POWER_CNTL = 2,
716            TPL1_DBG_ECO_CNTL = 0x05008000,
717            GRAS_DBG_ECO_CNTL = 0x0,
718            SP_CHICKEN_BITS = 0x00001400,
719            UCHE_CLIENT_PF = 0x00000084,
720            PC_MODE_CNTL = 0x1f,
721            SP_DBG_ECO_CNTL = 0x01000000,
722            RB_DBG_ECO_CNTL = 0x04100000,
723            RB_DBG_ECO_CNTL_blit = 0x04100000,
724            HLSQ_DBG_ECO_CNTL = 0x0,
725            RB_UNKNOWN_8E01 = 0x0,
726            VPC_DBG_ECO_CNTL = 0x02000000,
727            UCHE_UNKNOWN_0E12 = 0x00000001
728        )
729    ))
730
731add_gpus([
732        GPUId(chip_id=0x6060201, name="FD644"),
733    ], A6xxGPUInfo(
734        CHIP.A6XX,
735        [a6xx_base, a6xx_gen4],
736        num_ccu = 3,
737        tile_align_w = 96,
738        tile_align_h = 16,
739        num_vsc_pipes = 32,
740        cs_shared_mem_size = 32 * 1024,
741        wave_granularity = 2,
742        fibers_per_sp = 128 * 4 * 16,
743        magic_regs = dict(
744            PC_POWER_CNTL = 2,
745            TPL1_DBG_ECO_CNTL = 0x05008000,
746            GRAS_DBG_ECO_CNTL = 0x0,
747            SP_CHICKEN_BITS = 0x00001400,
748            UCHE_CLIENT_PF = 0x00000084,
749            PC_MODE_CNTL = 0x1f,
750            SP_DBG_ECO_CNTL = 0x6,
751            RB_DBG_ECO_CNTL = 0x04100000,
752            RB_DBG_ECO_CNTL_blit = 0x04100000,
753            HLSQ_DBG_ECO_CNTL = 0x0,
754            RB_UNKNOWN_8E01 = 0x0,
755            VPC_DBG_ECO_CNTL = 0x02000000,
756            UCHE_UNKNOWN_0E12 = 0x00000001
757        )
758    ))
759
760add_gpus([
761        GPUId(690),
762        GPUId(chip_id=0xffff06090000, name="FD690"), # Default no-speedbin fallback
763    ], A6xxGPUInfo(
764        CHIP.A6XX,
765        [a6xx_base, a6xx_gen4, a6xx_a690_quirk],
766        num_ccu = 8,
767        tile_align_w = 64,
768        tile_align_h = 32,
769        num_vsc_pipes = 32,
770        cs_shared_mem_size = 32 * 1024,
771        wave_granularity = 2,
772        fibers_per_sp = 128 * 2 * 16,
773        magic_regs = dict(
774            PC_POWER_CNTL = 7,
775            TPL1_DBG_ECO_CNTL = 0x04c00000,
776            GRAS_DBG_ECO_CNTL = 0x0,
777            SP_CHICKEN_BITS = 0x00001400,
778            UCHE_CLIENT_PF = 0x00000084,
779            PC_MODE_CNTL = 0x1f,
780            SP_DBG_ECO_CNTL = 0x1200000,
781            RB_DBG_ECO_CNTL = 0x100000,
782            RB_DBG_ECO_CNTL_blit = 0x00100000,  # ???
783            HLSQ_DBG_ECO_CNTL = 0x0,
784            RB_UNKNOWN_8E01 = 0x0,
785            VPC_DBG_ECO_CNTL = 0x2000400,
786            UCHE_UNKNOWN_0E12 = 0x00000001
787        ),
788        raw_magic_regs = [
789            [A6XXRegs.REG_A6XX_SP_UNKNOWN_AAF2, 0x00c00000],
790        ],
791    ))
792
793# Based on a6xx_base + a6xx_gen4
794a7xx_base = A6XXProps(
795        has_gmem_fast_clear = True,
796        has_hw_multiview = True,
797        has_fs_tex_prefetch = True,
798        has_sampler_minmax = True,
799
800        supports_double_threadsize = True,
801
802        sysmem_per_ccu_depth_cache_size = 256 * 1024,
803        sysmem_per_ccu_color_cache_size = 64 * 1024,
804        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.EIGHTH.value,
805
806        prim_alloc_threshold = 0x7,
807        vs_max_inputs_count = 32,
808        max_sets = 8,
809
810        reg_size_vec4 = 96,
811        # Blob limits it to 128 but we hang with 128
812        instr_cache_size = 127,
813        supports_multiview_mask = True,
814        has_z24uint_s8uint = True,
815        tess_use_shared = True,
816        storage_16bit = True,
817        has_tex_filter_cubic = True,
818        has_separate_chroma_filter = True,
819        has_sample_locations = True,
820        has_lpac = True,
821        has_shading_rate = True,
822        has_getfiberid = True,
823        has_dp2acc = True,
824        has_dp4acc = True,
825        enable_lrz_fast_clear = True,
826        has_lrz_dir_tracking = True,
827        has_lrz_feedback = True,
828        has_per_view_viewport = True,
829        line_width_min = 1.0,
830        line_width_max = 127.5,
831        has_scalar_alu = True,
832        has_coherent_ubwc_flag_caches = True,
833        has_isam_v = True,
834        has_ssbo_imm_offsets = True,
835        has_early_preamble = True,
836    )
837
838a7xx_725 = A7XXProps(
839        cmdbuf_start_a725_quirk = True,
840        supports_ibo_ubwc = True,
841        fs_must_have_non_zero_constlen_quirk = True,
842        enable_tp_ubwc_flag_hint = True,
843    )
844
845a7xx_730 = A7XXProps(
846        supports_ibo_ubwc = True,
847        fs_must_have_non_zero_constlen_quirk = True,
848        enable_tp_ubwc_flag_hint = True,
849    )
850
851a7xx_735 = A7XXProps(
852        stsc_duplication_quirk = True,
853        has_event_write_sample_count = True,
854        ubwc_unorm_snorm_int_compatible = True,
855        supports_ibo_ubwc = True,
856        fs_must_have_non_zero_constlen_quirk = True,
857        enable_tp_ubwc_flag_hint = True,
858    )
859
860a7xx_740 = A7XXProps(
861        stsc_duplication_quirk = True,
862        has_event_write_sample_count = True,
863        ubwc_unorm_snorm_int_compatible = True,
864        supports_ibo_ubwc = True,
865        fs_must_have_non_zero_constlen_quirk = True,
866        # Most devices with a740 have blob v6xx which doesn't have
867        # this hint set. Match them for better compatibility by default.
868        enable_tp_ubwc_flag_hint = False,
869    )
870
871a7xx_740_a32 = A7XXProps(
872        cmdbuf_start_a725_quirk = True,
873        stsc_duplication_quirk = True,
874        has_event_write_sample_count = True,
875        ubwc_unorm_snorm_int_compatible = True,
876        supports_ibo_ubwc = True,
877        fs_must_have_non_zero_constlen_quirk = True,
878        enable_tp_ubwc_flag_hint = False,
879    )
880
881a7xx_750 = A7XXProps(
882        has_event_write_sample_count = True,
883        load_inline_uniforms_via_preamble_ldgk = True,
884        load_shader_consts_via_preamble = True,
885        has_gmem_vpc_attr_buf = True,
886        sysmem_vpc_attr_buf_size = 0x20000,
887        gmem_vpc_attr_buf_size = 0xc000,
888        ubwc_unorm_snorm_int_compatible = True,
889        supports_ibo_ubwc = True,
890        has_generic_clear = True,
891        gs_vpc_adjacency_quirk = True,
892        storage_8bit = True,
893        ubwc_all_formats_compatible = True,
894        has_compliant_dp4acc = True,
895        ubwc_coherency_quirk = True,
896    )
897
898a730_magic_regs = dict(
899        TPL1_DBG_ECO_CNTL = 0x1000000,
900        GRAS_DBG_ECO_CNTL = 0x800,
901        SP_CHICKEN_BITS = 0x1440,
902        UCHE_CLIENT_PF = 0x00000084,
903        PC_MODE_CNTL = 0x0000003f, # 0x00001f1f in some tests
904        SP_DBG_ECO_CNTL = 0x10000000,
905        RB_DBG_ECO_CNTL = 0x00000000,
906        RB_DBG_ECO_CNTL_blit = 0x00000000,  # is it even needed?
907        RB_UNKNOWN_8E01 = 0x0,
908        VPC_DBG_ECO_CNTL = 0x02000000,
909        UCHE_UNKNOWN_0E12 = 0x3200000,
910
911        RB_UNKNOWN_8E06 = 0x02080000,
912    )
913
914a730_raw_magic_regs = [
915        [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00840004],
916        [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724],
917
918        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00002400],
919        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00000000],
920        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000],
921        [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
922        [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000040],
923        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00008000],
924        [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x20080000],
925        [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21fc7f00],
926        [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00000000],
927        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
928        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
929        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
930        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
931        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
932        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
933        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
934        [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
935
936        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
937        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
938
939        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
940        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
941        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
942        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
943        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
944        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
945
946        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
947
948        [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79,   0x00000000],
949        [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
950        [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
951
952        # Shading rate group
953        [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4,   0x00000000],
954        [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000],
955        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000],
956    ]
957
958add_gpus([
959        # These are named as Adreno730v3 or Adreno725v1.
960        GPUId(chip_id=0x07030002, name="FD725"),
961        GPUId(chip_id=0xffff07030002, name="FD725"),
962    ], A6xxGPUInfo(
963        CHIP.A7XX,
964        [a7xx_base, a7xx_725],
965        num_ccu = 4,
966        tile_align_w = 64,
967        tile_align_h = 32,
968        num_vsc_pipes = 32,
969        cs_shared_mem_size = 32 * 1024,
970        wave_granularity = 2,
971        fibers_per_sp = 128 * 2 * 16,
972        magic_regs = a730_magic_regs,
973        raw_magic_regs = a730_raw_magic_regs,
974    ))
975
976add_gpus([
977        GPUId(chip_id=0x07030001, name="FD730"), # KGSL, no speedbin data
978        GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback
979    ], A6xxGPUInfo(
980        CHIP.A7XX,
981        [a7xx_base, a7xx_730],
982        num_ccu = 4,
983        tile_align_w = 64,
984        tile_align_h = 32,
985        num_vsc_pipes = 32,
986        cs_shared_mem_size = 32 * 1024,
987        wave_granularity = 2,
988        fibers_per_sp = 128 * 2 * 16,
989        magic_regs = a730_magic_regs,
990        raw_magic_regs = a730_raw_magic_regs,
991    ))
992
993add_gpus([
994        GPUId(chip_id=0x43030B00, name="FD735")
995    ], A6xxGPUInfo(
996        CHIP.A7XX,
997        [a7xx_base, a7xx_735],
998        num_ccu = 3,
999        tile_align_w = 96,
1000        tile_align_h = 32,
1001        num_vsc_pipes = 32,
1002        cs_shared_mem_size = 32 * 1024,
1003        wave_granularity = 2,
1004        fibers_per_sp = 128 * 2 * 16,
1005        magic_regs = dict(
1006            TPL1_DBG_ECO_CNTL = 0x11100000,
1007            GRAS_DBG_ECO_CNTL = 0x00004800,
1008            SP_CHICKEN_BITS = 0x10001400,
1009            UCHE_CLIENT_PF = 0x00000084,
1010            PC_MODE_CNTL = 0x0000001f,
1011            SP_DBG_ECO_CNTL = 0x10000000,
1012            RB_DBG_ECO_CNTL = 0x00000001,
1013            RB_DBG_ECO_CNTL_blit = 0x00000001,  # is it even needed?
1014            RB_UNKNOWN_8E01 = 0x0,
1015            VPC_DBG_ECO_CNTL = 0x02000000,
1016            UCHE_UNKNOWN_0E12 = 0x00000000,
1017
1018            RB_UNKNOWN_8E06 = 0x02080000,
1019        ),
1020        raw_magic_regs = [
1021            [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000],
1022            [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724],
1023
1024            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400],
1025            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800],
1026            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000],
1027            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
1028            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000],
1029            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000],
1030            [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000],
1031            [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600],
1032            [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000],
1033            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
1034            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
1035            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
1036            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
1037            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
1038            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
1039            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
1040            [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
1041
1042            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
1043            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
1044
1045            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000],
1046            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000],
1047            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
1048            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
1049
1050            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
1051            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
1052            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
1053            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
1054            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
1055            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
1056
1057            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
1058
1059            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79,   0x00000000],
1060            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
1061            [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
1062            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34,   0x00000000],
1063
1064            # Shading rate group
1065            [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4,   0x00000000],
1066            [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000],
1067            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
1068            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000],
1069        ],
1070    ))
1071
1072add_gpus([
1073        GPUId(740), # Deprecated, used for dev kernels.
1074        GPUId(chip_id=0x43050a01, name="FD740"), # KGSL, no speedbin data
1075        GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback
1076        GPUId(chip_id=0x43050B00, name="FD740"), # Quest 3
1077        GPUId(chip_id=0xffff43050B00, name="FD740"),
1078        GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"),
1079    ], A6xxGPUInfo(
1080        CHIP.A7XX,
1081        [a7xx_base, a7xx_740],
1082        num_ccu = 6,
1083        tile_align_w = 96,
1084        tile_align_h = 32,
1085        num_vsc_pipes = 32,
1086        cs_shared_mem_size = 32 * 1024,
1087        wave_granularity = 2,
1088        fibers_per_sp = 128 * 2 * 16,
1089        magic_regs = dict(
1090            # PC_POWER_CNTL = 7,
1091            TPL1_DBG_ECO_CNTL = 0x11100000,
1092            GRAS_DBG_ECO_CNTL = 0x00004800,
1093            SP_CHICKEN_BITS = 0x10001400,
1094            UCHE_CLIENT_PF = 0x00000084,
1095            # Blob uses 0x1f or 0x1f1f, however these values cause vertices
1096            # corruption in some tests.
1097            PC_MODE_CNTL = 0x0000003f,
1098            SP_DBG_ECO_CNTL = 0x10000000,
1099            RB_DBG_ECO_CNTL = 0x00000000,
1100            RB_DBG_ECO_CNTL_blit = 0x00000000,  # is it even needed?
1101            # HLSQ_DBG_ECO_CNTL = 0x0,
1102            RB_UNKNOWN_8E01 = 0x0,
1103            VPC_DBG_ECO_CNTL = 0x02000000,
1104            UCHE_UNKNOWN_0E12 = 0x00000000,
1105
1106            RB_UNKNOWN_8E06 = 0x02080000,
1107        ),
1108        raw_magic_regs = [
1109            [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004],
1110            [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724],
1111
1112            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400],
1113            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800],
1114            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000],
1115            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
1116            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000],
1117            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000],
1118            [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000],
1119            [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600],
1120            [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000],
1121            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
1122            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
1123            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
1124            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
1125            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
1126            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
1127            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
1128            [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
1129
1130            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
1131            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
1132
1133            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000],
1134            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000],
1135            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
1136            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
1137
1138            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
1139            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
1140            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
1141            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
1142            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
1143            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
1144
1145            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
1146
1147            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79,   0x00000000],
1148            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
1149            [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
1150            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34,   0x00000000],
1151
1152            # Shading rate group
1153            [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4,   0x00000000],
1154            [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000],
1155            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
1156            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000],
1157            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F5, 0x00000000],
1158        ],
1159    ))
1160
1161# Values from blob v676.0
1162add_gpus([
1163        GPUId(chip_id=0x43050a00, name="FDA32"), # Adreno A32 (G3x Gen 2)
1164        GPUId(chip_id=0xffff43050a00, name="FDA32"),
1165    ], A6xxGPUInfo(
1166        CHIP.A7XX,
1167        [a7xx_base, a7xx_740_a32],
1168        num_ccu = 6,
1169        tile_align_w = 96,
1170        tile_align_h = 32,
1171        num_vsc_pipes = 32,
1172        cs_shared_mem_size = 32 * 1024,
1173        wave_granularity = 2,
1174        fibers_per_sp = 128 * 2 * 16,
1175        magic_regs = dict(
1176            # PC_POWER_CNTL = 7,
1177            TPL1_DBG_ECO_CNTL = 0x11100000,
1178            GRAS_DBG_ECO_CNTL = 0x00004800,
1179            SP_CHICKEN_BITS = 0x10001400,
1180            UCHE_CLIENT_PF = 0x00000084,
1181            # Blob uses 0x1f or 0x1f1f, however these values cause vertices
1182            # corruption in some tests.
1183            PC_MODE_CNTL = 0x0000003f,
1184            SP_DBG_ECO_CNTL = 0x10000000,
1185            RB_DBG_ECO_CNTL = 0x00000000,
1186            RB_DBG_ECO_CNTL_blit = 0x00000000,  # is it even needed?
1187            # HLSQ_DBG_ECO_CNTL = 0x0,
1188            RB_UNKNOWN_8E01 = 0x00000000,
1189            VPC_DBG_ECO_CNTL = 0x02000000,
1190            UCHE_UNKNOWN_0E12 = 0x00000000,
1191
1192            RB_UNKNOWN_8E06 = 0x02080000,
1193        ),
1194        raw_magic_regs = [
1195            [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004],
1196            [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00000700],
1197
1198            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400],
1199            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430820],
1200            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000],
1201            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
1202            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080],
1203            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000],
1204            [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000],
1205            [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600],
1206            [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000],
1207            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
1208            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
1209            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
1210            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
1211            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
1212            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
1213            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
1214            [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
1215
1216            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
1217            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
1218
1219            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000],
1220            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000],
1221            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
1222            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
1223
1224            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
1225            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
1226            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
1227            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
1228            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
1229            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
1230
1231            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
1232
1233            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79,   0x00000000],
1234            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
1235            [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
1236
1237            # Shading rate group
1238            [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4,   0x00000000],
1239            [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000],
1240            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000],
1241            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F5, 0x00000000],
1242        ],
1243    ))
1244
1245add_gpus([
1246        GPUId(chip_id=0x43051401, name="FD750"), # KGSL, no speedbin data
1247        GPUId(chip_id=0xffff43051401, name="FD750"), # Default no-speedbin fallback
1248    ], A6xxGPUInfo(
1249        CHIP.A7XX,
1250        [a7xx_base, a7xx_750],
1251        num_ccu = 6,
1252        tile_align_w = 96,
1253        tile_align_h = 32,
1254        num_vsc_pipes = 32,
1255        cs_shared_mem_size = 32 * 1024,
1256        wave_granularity = 2,
1257        fibers_per_sp = 128 * 2 * 16,
1258        magic_regs = dict(
1259            TPL1_DBG_ECO_CNTL = 0x11100000,
1260            GRAS_DBG_ECO_CNTL = 0x00004800,
1261            SP_CHICKEN_BITS = 0x10000400,
1262            PC_MODE_CNTL = 0x00003f1f,
1263            SP_DBG_ECO_CNTL = 0x10000000,
1264            RB_DBG_ECO_CNTL = 0x00000001,
1265            RB_DBG_ECO_CNTL_blit = 0x00000001,
1266            RB_UNKNOWN_8E01 = 0x0,
1267            VPC_DBG_ECO_CNTL = 0x02000000,
1268            UCHE_UNKNOWN_0E12 = 0x40000000,
1269
1270            RB_UNKNOWN_8E06 = 0x02082000,
1271        ),
1272        raw_magic_regs = [
1273            [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000],
1274            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
1275            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080],
1276            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000000],
1277            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00431800],
1278            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00800000],
1279            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000],
1280            [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000],
1281            [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600],
1282            [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000],
1283            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
1284            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
1285            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
1286            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
1287            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
1288            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
1289            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
1290            [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
1291            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
1292            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
1293            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000],
1294            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000],
1295            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
1296            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
1297
1298            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
1299            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
1300            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
1301            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
1302            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
1303            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
1304
1305            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
1306
1307            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
1308            [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
1309            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34,   0x00000000],
1310
1311            # Shading rate group
1312            [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4,   0x00000000],
1313            [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000],
1314            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
1315            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000],
1316
1317            [0x930a, 0],
1318            [0x960a, 1],
1319            [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS_CONTROL, 0],
1320            [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS, 0],
1321        ],
1322    ))
1323
1324template = """\
1325/* Copyright © 2021 Google, Inc.
1326 *
1327 * SPDX-License-Identifier: MIT
1328 */
1329
1330#include "freedreno_dev_info.h"
1331#include "util/u_debug.h"
1332#include "util/log.h"
1333
1334#include <stdlib.h>
1335
1336/* Map python to C: */
1337#define True true
1338#define False false
1339
1340%for info in s.gpu_infos:
1341static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)};
1342%endfor
1343
1344static const struct fd_dev_rec fd_dev_recs[] = {
1345%for id, info in s.gpus.items():
1346   { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} },
1347%endfor
1348};
1349
1350void
1351fd_dev_info_apply_dbg_options(struct fd_dev_info *info)
1352{
1353    const char *env = debug_get_option("FD_DEV_FEATURES", NULL);
1354    if (!env || !*env)
1355        return;
1356
1357    char *features = strdup(env);
1358    char *feature, *feature_end;
1359    feature = strtok_r(features, ":", &feature_end);
1360    while (feature != NULL) {
1361        char *name, *name_end;
1362        name = strtok_r(feature, "=", &name_end);
1363
1364        if (!name) {
1365            mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", feature);
1366            exit(1);
1367        }
1368
1369        char *value = strtok_r(NULL, "=", &name_end);
1370
1371        feature = strtok_r(NULL, ":", &feature_end);
1372
1373%for (prop, gen), val in unique_props.items():
1374  <%
1375    if isinstance(val, bool):
1376        parse_value = "debug_parse_bool_option"
1377    else:
1378        parse_value = "debug_parse_num_option"
1379  %>
1380        if (strcmp(name, "${prop}") == 0) {
1381            info->${gen}.${prop} = ${parse_value}(value, info->${gen}.${prop});
1382            continue;
1383        }
1384%endfor
1385
1386        mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", name);
1387        exit(1);
1388    }
1389
1390    free(features);
1391}
1392"""
1393
1394print(Template(template).render(s=s, unique_props=A6XXProps.unique_props))
1395
1396