1# 2# Copyright © 2021 Google, Inc. 3# 4# SPDX-License-Identifier: MIT 5 6from mako.template import Template 7import sys 8import argparse 9from enum import Enum 10 11def max_bitfield_val(high, low, shift): 12 return ((1 << (high - low)) - 1) << shift 13 14 15parser = argparse.ArgumentParser() 16parser.add_argument('-p', '--import-path', required=True) 17args = parser.parse_args() 18sys.path.insert(0, args.import_path) 19 20from a6xx import * 21 22 23class CHIP(Enum): 24 A2XX = 2 25 A3XX = 3 26 A4XX = 4 27 A5XX = 5 28 A6XX = 6 29 A7XX = 7 30 31class CCUColorCacheFraction(Enum): 32 FULL = 0 33 HALF = 1 34 QUARTER = 2 35 EIGHTH = 3 36 37 38class State(object): 39 def __init__(self): 40 # List of unique device-info structs, multiple different GPU ids 41 # can map to a single info struct in cases where the differences 42 # are not sw visible, or the only differences are parameters 43 # queried from the kernel (like GMEM size) 44 self.gpu_infos = [] 45 46 # Table mapping GPU id to device-info struct 47 self.gpus = {} 48 49 def info_index(self, gpu_info): 50 i = 0 51 for info in self.gpu_infos: 52 if gpu_info == info: 53 return i 54 i += 1 55 raise Error("invalid info") 56 57s = State() 58 59def add_gpus(ids, info): 60 for id in ids: 61 s.gpus[id] = info 62 63class GPUId(object): 64 def __init__(self, gpu_id = None, chip_id = None, name=None): 65 if chip_id == None: 66 assert(gpu_id != None) 67 val = gpu_id 68 core = int(val / 100) 69 val -= (core * 100); 70 major = int(val / 10); 71 val -= (major * 10) 72 minor = val 73 chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff 74 self.chip_id = chip_id 75 if gpu_id == None: 76 gpu_id = 0 77 self.gpu_id = gpu_id 78 if name == None: 79 assert(gpu_id != 0) 80 name = "FD%d" % gpu_id 81 self.name = name 82 83class Struct(object): 84 """A helper class that stringifies itself to a 'C' struct initializer 85 """ 86 def __str__(self): 87 s = "{" 88 for name, value in vars(self).items(): 89 s += "." + name + "=" + str(value) + "," 90 return s + "}" 91 92class GPUInfo(Struct): 93 """Base class for any generation of adreno, consists of GMEM layout 94 related parameters 95 96 Note that tile_max_h is normally only constrained by corresponding 97 bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h 98 tends to have lower limits, in which case a comment will describe 99 the bitfield size/shift 100 """ 101 def __init__(self, chip, gmem_align_w, gmem_align_h, 102 tile_align_w, tile_align_h, 103 tile_max_w, tile_max_h, num_vsc_pipes, 104 cs_shared_mem_size, num_sp_cores, wave_granularity, fibers_per_sp, 105 threadsize_base = 64, max_waves = 16): 106 self.chip = chip.value 107 self.gmem_align_w = gmem_align_w 108 self.gmem_align_h = gmem_align_h 109 self.tile_align_w = tile_align_w 110 self.tile_align_h = tile_align_h 111 self.tile_max_w = tile_max_w 112 self.tile_max_h = tile_max_h 113 self.num_vsc_pipes = num_vsc_pipes 114 self.cs_shared_mem_size = cs_shared_mem_size 115 self.num_sp_cores = num_sp_cores 116 self.wave_granularity = wave_granularity 117 self.fibers_per_sp = fibers_per_sp 118 self.threadsize_base = threadsize_base 119 self.max_waves = max_waves 120 121 s.gpu_infos.append(self) 122 123 124class A6xxGPUInfo(GPUInfo): 125 """The a6xx generation has a lot more parameters, and is broken down 126 into distinct sub-generations. The template parameter avoids 127 duplication of parameters that are unique to the sub-generation. 128 """ 129 def __init__(self, chip, template, num_ccu, 130 tile_align_w, tile_align_h, num_vsc_pipes, 131 cs_shared_mem_size, wave_granularity, fibers_per_sp, 132 magic_regs, raw_magic_regs = None, threadsize_base = 64, 133 max_waves = 16): 134 if chip == CHIP.A6XX: 135 tile_max_w = 1024 # max_bitfield_val(5, 0, 5) 136 tile_max_h = max_bitfield_val(14, 8, 4) # 1008 137 else: 138 tile_max_w = 1728 139 tile_max_h = 1728 140 141 super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4, 142 tile_align_w = tile_align_w, 143 tile_align_h = tile_align_h, 144 tile_max_w = tile_max_w, 145 tile_max_h = tile_max_h, 146 num_vsc_pipes = num_vsc_pipes, 147 cs_shared_mem_size = cs_shared_mem_size, 148 num_sp_cores = num_ccu, # The # of SP cores seems to always match # of CCU 149 wave_granularity = wave_granularity, 150 fibers_per_sp = fibers_per_sp, 151 threadsize_base = threadsize_base, 152 max_waves = max_waves) 153 154 self.num_ccu = num_ccu 155 156 self.a6xx = Struct() 157 self.a7xx = Struct() 158 159 self.a6xx.magic = Struct() 160 161 for name, val in magic_regs.items(): 162 setattr(self.a6xx.magic, name, val) 163 164 if raw_magic_regs: 165 self.a6xx.magic_raw = [[int(r[0]), r[1]] for r in raw_magic_regs] 166 167 templates = template if type(template) is list else [template] 168 for template in templates: 169 template.apply_props(self) 170 171 172 def __str__(self): 173 return super(A6xxGPUInfo, self).__str__().replace('[', '{').replace("]", "}") 174 175 176# a2xx is really two sub-generations, a20x and a22x, but we don't currently 177# capture that in the device-info tables 178add_gpus([ 179 GPUId(200), 180 GPUId(201), 181 GPUId(205), 182 GPUId(220), 183 ], GPUInfo( 184 CHIP.A2XX, 185 gmem_align_w = 32, gmem_align_h = 32, 186 tile_align_w = 32, tile_align_h = 32, 187 tile_max_w = 512, 188 tile_max_h = ~0, # TODO 189 num_vsc_pipes = 8, 190 cs_shared_mem_size = 0, 191 num_sp_cores = 0, # TODO 192 wave_granularity = 2, 193 fibers_per_sp = 0, # TODO 194 threadsize_base = 8, # TODO: Confirm this 195 )) 196 197add_gpus([ 198 GPUId(305), 199 GPUId(307), 200 GPUId(320), 201 GPUId(330), 202 GPUId(chip_id=0x03000512, name="FD305B"), 203 GPUId(chip_id=0x03000620, name="FD306A"), 204 ], GPUInfo( 205 CHIP.A3XX, 206 gmem_align_w = 32, gmem_align_h = 32, 207 tile_align_w = 32, tile_align_h = 32, 208 tile_max_w = 992, # max_bitfield_val(4, 0, 5) 209 tile_max_h = max_bitfield_val(9, 5, 5), 210 num_vsc_pipes = 8, 211 cs_shared_mem_size = 32 * 1024, 212 num_sp_cores = 0, # TODO 213 wave_granularity = 2, 214 fibers_per_sp = 0, # TODO 215 threadsize_base = 8, 216 )) 217 218add_gpus([ 219 GPUId(405), 220 GPUId(420), 221 GPUId(430), 222 ], GPUInfo( 223 CHIP.A4XX, 224 gmem_align_w = 32, gmem_align_h = 32, 225 tile_align_w = 32, tile_align_h = 32, 226 tile_max_w = 1024, # max_bitfield_val(4, 0, 5) 227 tile_max_h = max_bitfield_val(9, 5, 5), 228 num_vsc_pipes = 8, 229 cs_shared_mem_size = 32 * 1024, 230 num_sp_cores = 0, # TODO 231 wave_granularity = 2, 232 fibers_per_sp = 0, # TODO 233 threadsize_base = 32, # TODO: Confirm this 234 )) 235 236add_gpus([ 237 GPUId(505), 238 GPUId(506), 239 GPUId(508), 240 GPUId(509), 241 ], GPUInfo( 242 CHIP.A5XX, 243 gmem_align_w = 64, gmem_align_h = 32, 244 tile_align_w = 64, tile_align_h = 32, 245 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 246 tile_max_h = max_bitfield_val(16, 9, 5), 247 num_vsc_pipes = 16, 248 cs_shared_mem_size = 32 * 1024, 249 num_sp_cores = 1, 250 wave_granularity = 2, 251 fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. 252 threadsize_base = 32, 253 )) 254 255add_gpus([ 256 GPUId(510), 257 GPUId(512), 258 ], GPUInfo( 259 CHIP.A5XX, 260 gmem_align_w = 64, gmem_align_h = 32, 261 tile_align_w = 64, tile_align_h = 32, 262 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 263 tile_max_h = max_bitfield_val(16, 9, 5), 264 num_vsc_pipes = 16, 265 cs_shared_mem_size = 32 * 1024, 266 num_sp_cores = 2, 267 wave_granularity = 2, 268 fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. 269 threadsize_base = 32, 270 )) 271 272add_gpus([ 273 GPUId(530), 274 GPUId(540), 275 ], GPUInfo( 276 CHIP.A5XX, 277 gmem_align_w = 64, gmem_align_h = 32, 278 tile_align_w = 64, tile_align_h = 32, 279 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 280 tile_max_h = max_bitfield_val(16, 9, 5), 281 num_vsc_pipes = 16, 282 cs_shared_mem_size = 32 * 1024, 283 num_sp_cores = 4, 284 wave_granularity = 2, 285 fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. 286 threadsize_base = 32, 287 )) 288 289 290class A6XXProps(dict): 291 unique_props = dict() 292 def apply_gen_props(self, gen, gpu_info): 293 for name, val in self.items(): 294 setattr(getattr(gpu_info, gen), name, val) 295 A6XXProps.unique_props[(name, gen)] = val 296 297 def apply_props(self, gpu_info): 298 self.apply_gen_props("a6xx", gpu_info) 299 300 301class A7XXProps(A6XXProps): 302 def apply_props(self, gpu_info): 303 self.apply_gen_props("a7xx", gpu_info) 304 305 306# Props could be modified with env var: 307# FD_DEV_FEATURES=%feature_name%=%value%:%feature_name%=%value%:... 308# e.g. 309# FD_DEV_FEATURES=has_fs_tex_prefetch=0:max_sets=4 310 311a6xx_base = A6XXProps( 312 has_cp_reg_write = True, 313 has_8bpp_ubwc = True, 314 has_gmem_fast_clear = True, 315 has_hw_multiview = True, 316 has_fs_tex_prefetch = True, 317 has_sampler_minmax = True, 318 319 supports_double_threadsize = True, 320 321 sysmem_per_ccu_depth_cache_size = 64 * 1024, 322 sysmem_per_ccu_color_cache_size = 64 * 1024, 323 gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value, 324 325 prim_alloc_threshold = 0x7, 326 vs_max_inputs_count = 32, 327 max_sets = 5, 328 line_width_min = 1.0, 329 line_width_max = 1.0, 330 ) 331 332 333# a6xx can be divided into distinct sub-generations, where certain device- 334# info parameters are keyed to the sub-generation. These templates reduce 335# the copypaste 336 337a6xx_gen1_low = A6XXProps( 338 reg_size_vec4 = 48, 339 instr_cache_size = 64, 340 indirect_draw_wfm_quirk = True, 341 depth_bounds_require_depth_test_quirk = True, 342 343 has_gmem_fast_clear = False, 344 has_hw_multiview = False, 345 has_sampler_minmax = False, 346 has_fs_tex_prefetch = False, 347 sysmem_per_ccu_color_cache_size = 8 * 1024, 348 sysmem_per_ccu_depth_cache_size = 8 * 1024, 349 gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value, 350 vs_max_inputs_count = 16, 351 supports_double_threadsize = False, 352 ) 353 354a6xx_gen1 = A6XXProps( 355 reg_size_vec4 = 96, 356 instr_cache_size = 64, 357 indirect_draw_wfm_quirk = True, 358 depth_bounds_require_depth_test_quirk = True, 359 ) 360 361a6xx_gen2 = A6XXProps( 362 reg_size_vec4 = 96, 363 instr_cache_size = 64, # TODO 364 supports_multiview_mask = True, 365 has_z24uint_s8uint = True, 366 indirect_draw_wfm_quirk = True, 367 depth_bounds_require_depth_test_quirk = True, # TODO: check if true 368 has_dp2acc = False, # TODO: check if true 369 has_8bpp_ubwc = False, 370 ) 371 372a6xx_gen3 = A6XXProps( 373 reg_size_vec4 = 64, 374 # Blob limits it to 128 but we hang with 128 375 instr_cache_size = 127, 376 supports_multiview_mask = True, 377 has_z24uint_s8uint = True, 378 tess_use_shared = True, 379 storage_16bit = True, 380 has_tex_filter_cubic = True, 381 has_separate_chroma_filter = True, 382 has_sample_locations = True, 383 has_8bpp_ubwc = False, 384 has_dp2acc = True, 385 has_lrz_dir_tracking = True, 386 enable_lrz_fast_clear = True, 387 lrz_track_quirk = True, 388 has_lrz_feedback = True, 389 has_per_view_viewport = True, 390 has_scalar_alu = True, 391 has_early_preamble = True, 392 ) 393 394a6xx_gen4 = A6XXProps( 395 reg_size_vec4 = 64, 396 # Blob limits it to 128 but we hang with 128 397 instr_cache_size = 127, 398 supports_multiview_mask = True, 399 has_z24uint_s8uint = True, 400 tess_use_shared = True, 401 storage_16bit = True, 402 has_tex_filter_cubic = True, 403 has_separate_chroma_filter = True, 404 has_sample_locations = True, 405 has_cp_reg_write = False, 406 has_8bpp_ubwc = False, 407 has_lpac = True, 408 has_shading_rate = True, 409 has_getfiberid = True, 410 has_dp2acc = True, 411 has_dp4acc = True, 412 enable_lrz_fast_clear = True, 413 has_lrz_dir_tracking = True, 414 has_lrz_feedback = True, 415 has_per_view_viewport = True, 416 has_scalar_alu = True, 417 has_isam_v = True, 418 has_ssbo_imm_offsets = True, 419 # TODO: there seems to be a quirk where at least rcp can't be in an 420 # early preamble. a660 at least is affected. 421 #has_early_preamble = True, 422 ) 423 424a6xx_a690_quirk = A6XXProps( 425 broken_ds_ubwc_quirk = True, 426 ) 427 428add_gpus([ 429 GPUId(605), # TODO: Test it, based only on libwrapfake dumps 430 GPUId(608), # TODO: Test it, based only on libwrapfake dumps 431 GPUId(610), 432 GPUId(612), # TODO: Test it, based only on libwrapfake dumps 433 ], A6xxGPUInfo( 434 CHIP.A6XX, 435 [a6xx_base, a6xx_gen1_low], 436 num_ccu = 1, 437 tile_align_w = 32, 438 tile_align_h = 16, 439 num_vsc_pipes = 16, 440 cs_shared_mem_size = 16 * 1024, 441 wave_granularity = 1, 442 fibers_per_sp = 128 * 16, 443 magic_regs = dict( 444 PC_POWER_CNTL = 0, 445 TPL1_DBG_ECO_CNTL = 0, 446 GRAS_DBG_ECO_CNTL = 0, 447 SP_CHICKEN_BITS = 0, 448 UCHE_CLIENT_PF = 0x00000004, 449 PC_MODE_CNTL = 0xf, 450 SP_DBG_ECO_CNTL = 0x0, 451 RB_DBG_ECO_CNTL = 0x04100000, 452 RB_DBG_ECO_CNTL_blit = 0x04100000, 453 HLSQ_DBG_ECO_CNTL = 0, 454 RB_UNKNOWN_8E01 = 0x00000001, 455 VPC_DBG_ECO_CNTL = 0x0, 456 UCHE_UNKNOWN_0E12 = 0x10000000, 457 ), 458 )) 459 460add_gpus([ 461 GPUId(615), 462 GPUId(616), 463 GPUId(618), 464 GPUId(619), 465 ], A6xxGPUInfo( 466 CHIP.A6XX, 467 [a6xx_base, a6xx_gen1], 468 num_ccu = 1, 469 tile_align_w = 32, 470 tile_align_h = 32, 471 num_vsc_pipes = 32, 472 cs_shared_mem_size = 32 * 1024, 473 wave_granularity = 2, 474 fibers_per_sp = 128 * 16, 475 magic_regs = dict( 476 PC_POWER_CNTL = 0, 477 TPL1_DBG_ECO_CNTL = 0x00108000, 478 GRAS_DBG_ECO_CNTL = 0x00000880, 479 SP_CHICKEN_BITS = 0x00000430, 480 UCHE_CLIENT_PF = 0x00000004, 481 PC_MODE_CNTL = 0x1f, 482 SP_DBG_ECO_CNTL = 0x0, 483 RB_DBG_ECO_CNTL = 0x04100000, 484 RB_DBG_ECO_CNTL_blit = 0x04100000, 485 HLSQ_DBG_ECO_CNTL = 0x00080000, 486 RB_UNKNOWN_8E01 = 0x00000001, 487 VPC_DBG_ECO_CNTL = 0x0, 488 UCHE_UNKNOWN_0E12 = 0x00000001 489 ) 490 )) 491 492add_gpus([ 493 GPUId(620), 494 ], A6xxGPUInfo( 495 CHIP.A6XX, 496 [a6xx_base, a6xx_gen1], 497 num_ccu = 1, 498 tile_align_w = 32, 499 tile_align_h = 16, 500 num_vsc_pipes = 32, 501 cs_shared_mem_size = 32 * 1024, 502 wave_granularity = 2, 503 fibers_per_sp = 128 * 16, 504 magic_regs = dict( 505 PC_POWER_CNTL = 0, 506 TPL1_DBG_ECO_CNTL = 0x01008000, 507 GRAS_DBG_ECO_CNTL = 0x0, 508 SP_CHICKEN_BITS = 0x00000400, 509 UCHE_CLIENT_PF = 0x00000004, 510 PC_MODE_CNTL = 0x1f, 511 SP_DBG_ECO_CNTL = 0x01000000, 512 RB_DBG_ECO_CNTL = 0x04100000, 513 RB_DBG_ECO_CNTL_blit = 0x04100000, 514 HLSQ_DBG_ECO_CNTL = 0x0, 515 RB_UNKNOWN_8E01 = 0x0, 516 VPC_DBG_ECO_CNTL = 0x02000000, 517 UCHE_UNKNOWN_0E12 = 0x00000001 518 ) 519 )) 520 521add_gpus([ 522 GPUId(chip_id=0xffff06020100, name="FD621"), 523 ], A6xxGPUInfo( 524 CHIP.A6XX, 525 [a6xx_base, a6xx_gen3, A6XXProps(lrz_track_quirk = False)], 526 num_ccu = 2, 527 tile_align_w = 96, 528 tile_align_h = 16, 529 num_vsc_pipes = 32, 530 cs_shared_mem_size = 32 * 1024, 531 wave_granularity = 2, 532 fibers_per_sp = 128 * 2 * 16, 533 magic_regs = dict( 534 PC_POWER_CNTL = 0, 535 # this seems to be a chicken bit that fixes cubic filtering: 536 TPL1_DBG_ECO_CNTL = 0x01008000, 537 GRAS_DBG_ECO_CNTL = 0x0, 538 SP_CHICKEN_BITS = 0x00001400, 539 # UCHE_CLIENT_PF = 0x00000004, 540 PC_MODE_CNTL = 0x1f, 541 SP_DBG_ECO_CNTL = 0x03000000, 542 RB_DBG_ECO_CNTL = 0x04100000, 543 RB_DBG_ECO_CNTL_blit = 0x04100000, 544 HLSQ_DBG_ECO_CNTL = 0x0, 545 RB_UNKNOWN_8E01 = 0x0, 546 VPC_DBG_ECO_CNTL = 0x02000000, 547 UCHE_UNKNOWN_0E12 = 0x00000001 548 ) 549 )) 550 551add_gpus([ 552 GPUId(630), 553 ], A6xxGPUInfo( 554 CHIP.A6XX, 555 [a6xx_base, a6xx_gen1], 556 num_ccu = 2, 557 tile_align_w = 32, 558 tile_align_h = 16, 559 num_vsc_pipes = 32, 560 cs_shared_mem_size = 32 * 1024, 561 wave_granularity = 2, 562 fibers_per_sp = 128 * 16, 563 magic_regs = dict( 564 PC_POWER_CNTL = 1, 565 TPL1_DBG_ECO_CNTL = 0x00108000, 566 GRAS_DBG_ECO_CNTL = 0x00000880, 567 SP_CHICKEN_BITS = 0x00001430, 568 UCHE_CLIENT_PF = 0x00000004, 569 PC_MODE_CNTL = 0x1f, 570 SP_DBG_ECO_CNTL = 0x0, 571 RB_DBG_ECO_CNTL = 0x04100000, 572 RB_DBG_ECO_CNTL_blit = 0x05100000, 573 HLSQ_DBG_ECO_CNTL = 0x00080000, 574 RB_UNKNOWN_8E01 = 0x00000001, 575 VPC_DBG_ECO_CNTL = 0x0, 576 UCHE_UNKNOWN_0E12 = 0x10000001 577 ) 578 )) 579 580add_gpus([ 581 GPUId(640), 582 ], A6xxGPUInfo( 583 CHIP.A6XX, 584 [a6xx_base, a6xx_gen2], 585 num_ccu = 2, 586 tile_align_w = 32, 587 tile_align_h = 16, 588 num_vsc_pipes = 32, 589 cs_shared_mem_size = 32 * 1024, 590 wave_granularity = 2, 591 fibers_per_sp = 128 * 4 * 16, 592 magic_regs = dict( 593 PC_POWER_CNTL = 1, 594 TPL1_DBG_ECO_CNTL = 0x00008000, 595 GRAS_DBG_ECO_CNTL = 0x0, 596 SP_CHICKEN_BITS = 0x00000420, 597 UCHE_CLIENT_PF = 0x00000004, 598 PC_MODE_CNTL = 0x1f, 599 SP_DBG_ECO_CNTL = 0x0, 600 RB_DBG_ECO_CNTL = 0x04100000, 601 RB_DBG_ECO_CNTL_blit = 0x04100000, 602 HLSQ_DBG_ECO_CNTL = 0x0, 603 RB_UNKNOWN_8E01 = 0x00000001, 604 VPC_DBG_ECO_CNTL = 0x02000000, 605 UCHE_UNKNOWN_0E12 = 0x00000001 606 ) 607 )) 608 609add_gpus([ 610 GPUId(680), 611 ], A6xxGPUInfo( 612 CHIP.A6XX, 613 [a6xx_base, a6xx_gen2], 614 num_ccu = 4, 615 tile_align_w = 64, 616 tile_align_h = 32, 617 num_vsc_pipes = 32, 618 cs_shared_mem_size = 32 * 1024, 619 wave_granularity = 2, 620 fibers_per_sp = 128 * 4 * 16, 621 magic_regs = dict( 622 PC_POWER_CNTL = 3, 623 TPL1_DBG_ECO_CNTL = 0x00108000, 624 GRAS_DBG_ECO_CNTL = 0x0, 625 SP_CHICKEN_BITS = 0x00001430, 626 UCHE_CLIENT_PF = 0x00000004, 627 PC_MODE_CNTL = 0x1f, 628 SP_DBG_ECO_CNTL = 0x0, 629 RB_DBG_ECO_CNTL = 0x04100000, 630 RB_DBG_ECO_CNTL_blit = 0x04100000, 631 HLSQ_DBG_ECO_CNTL = 0x0, 632 RB_UNKNOWN_8E01 = 0x00000001, 633 VPC_DBG_ECO_CNTL = 0x02000000, 634 UCHE_UNKNOWN_0E12 = 0x00000001 635 ) 636 )) 637 638add_gpus([ 639 GPUId(650), 640 ], A6xxGPUInfo( 641 CHIP.A6XX, 642 [a6xx_base, a6xx_gen3], 643 num_ccu = 3, 644 tile_align_w = 96, 645 tile_align_h = 16, 646 num_vsc_pipes = 32, 647 cs_shared_mem_size = 32 * 1024, 648 wave_granularity = 2, 649 fibers_per_sp = 128 * 2 * 16, 650 magic_regs = dict( 651 PC_POWER_CNTL = 2, 652 # this seems to be a chicken bit that fixes cubic filtering: 653 TPL1_DBG_ECO_CNTL = 0x01008000, 654 GRAS_DBG_ECO_CNTL = 0x0, 655 SP_CHICKEN_BITS = 0x00001400, 656 UCHE_CLIENT_PF = 0x00000004, 657 PC_MODE_CNTL = 0x1f, 658 SP_DBG_ECO_CNTL = 0x01000000, 659 RB_DBG_ECO_CNTL = 0x04100000, 660 RB_DBG_ECO_CNTL_blit = 0x04100000, 661 HLSQ_DBG_ECO_CNTL = 0x0, 662 RB_UNKNOWN_8E01 = 0x0, 663 VPC_DBG_ECO_CNTL = 0x02000000, 664 UCHE_UNKNOWN_0E12 = 0x00000001 665 ) 666 )) 667 668add_gpus([ 669 GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"), 670 GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"), 671 GPUId(chip_id=0x006006030500, name="Adreno 7c+ Gen 3 Lite"), 672 GPUId(chip_id=0x00ac06030500, name="FD643"), # e.g. QCM6490, Fairphone 5 673 # fallback wildcard entry should be last: 674 GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"), 675 ], A6xxGPUInfo( 676 CHIP.A6XX, 677 [a6xx_base, a6xx_gen4], 678 num_ccu = 2, 679 tile_align_w = 32, 680 tile_align_h = 16, 681 num_vsc_pipes = 32, 682 cs_shared_mem_size = 32 * 1024, 683 wave_granularity = 2, 684 fibers_per_sp = 128 * 2 * 16, 685 magic_regs = dict( 686 PC_POWER_CNTL = 1, 687 TPL1_DBG_ECO_CNTL = 0x05008000, 688 GRAS_DBG_ECO_CNTL = 0x0, 689 SP_CHICKEN_BITS = 0x00001400, 690 UCHE_CLIENT_PF = 0x00000084, 691 PC_MODE_CNTL = 0x1f, 692 SP_DBG_ECO_CNTL = 0x00000006, 693 RB_DBG_ECO_CNTL = 0x04100000, 694 RB_DBG_ECO_CNTL_blit = 0x04100000, 695 HLSQ_DBG_ECO_CNTL = 0x0, 696 RB_UNKNOWN_8E01 = 0x0, 697 VPC_DBG_ECO_CNTL = 0x02000000, 698 UCHE_UNKNOWN_0E12 = 0x00000001 699 ) 700 )) 701 702add_gpus([ 703 GPUId(660), 704 ], A6xxGPUInfo( 705 CHIP.A6XX, 706 [a6xx_base, a6xx_gen4], 707 num_ccu = 3, 708 tile_align_w = 96, 709 tile_align_h = 16, 710 num_vsc_pipes = 32, 711 cs_shared_mem_size = 32 * 1024, 712 wave_granularity = 2, 713 fibers_per_sp = 128 * 2 * 16, 714 magic_regs = dict( 715 PC_POWER_CNTL = 2, 716 TPL1_DBG_ECO_CNTL = 0x05008000, 717 GRAS_DBG_ECO_CNTL = 0x0, 718 SP_CHICKEN_BITS = 0x00001400, 719 UCHE_CLIENT_PF = 0x00000084, 720 PC_MODE_CNTL = 0x1f, 721 SP_DBG_ECO_CNTL = 0x01000000, 722 RB_DBG_ECO_CNTL = 0x04100000, 723 RB_DBG_ECO_CNTL_blit = 0x04100000, 724 HLSQ_DBG_ECO_CNTL = 0x0, 725 RB_UNKNOWN_8E01 = 0x0, 726 VPC_DBG_ECO_CNTL = 0x02000000, 727 UCHE_UNKNOWN_0E12 = 0x00000001 728 ) 729 )) 730 731add_gpus([ 732 GPUId(chip_id=0x6060201, name="FD644"), 733 ], A6xxGPUInfo( 734 CHIP.A6XX, 735 [a6xx_base, a6xx_gen4], 736 num_ccu = 3, 737 tile_align_w = 96, 738 tile_align_h = 16, 739 num_vsc_pipes = 32, 740 cs_shared_mem_size = 32 * 1024, 741 wave_granularity = 2, 742 fibers_per_sp = 128 * 4 * 16, 743 magic_regs = dict( 744 PC_POWER_CNTL = 2, 745 TPL1_DBG_ECO_CNTL = 0x05008000, 746 GRAS_DBG_ECO_CNTL = 0x0, 747 SP_CHICKEN_BITS = 0x00001400, 748 UCHE_CLIENT_PF = 0x00000084, 749 PC_MODE_CNTL = 0x1f, 750 SP_DBG_ECO_CNTL = 0x6, 751 RB_DBG_ECO_CNTL = 0x04100000, 752 RB_DBG_ECO_CNTL_blit = 0x04100000, 753 HLSQ_DBG_ECO_CNTL = 0x0, 754 RB_UNKNOWN_8E01 = 0x0, 755 VPC_DBG_ECO_CNTL = 0x02000000, 756 UCHE_UNKNOWN_0E12 = 0x00000001 757 ) 758 )) 759 760add_gpus([ 761 GPUId(690), 762 GPUId(chip_id=0xffff06090000, name="FD690"), # Default no-speedbin fallback 763 ], A6xxGPUInfo( 764 CHIP.A6XX, 765 [a6xx_base, a6xx_gen4, a6xx_a690_quirk], 766 num_ccu = 8, 767 tile_align_w = 64, 768 tile_align_h = 32, 769 num_vsc_pipes = 32, 770 cs_shared_mem_size = 32 * 1024, 771 wave_granularity = 2, 772 fibers_per_sp = 128 * 2 * 16, 773 magic_regs = dict( 774 PC_POWER_CNTL = 7, 775 TPL1_DBG_ECO_CNTL = 0x04c00000, 776 GRAS_DBG_ECO_CNTL = 0x0, 777 SP_CHICKEN_BITS = 0x00001400, 778 UCHE_CLIENT_PF = 0x00000084, 779 PC_MODE_CNTL = 0x1f, 780 SP_DBG_ECO_CNTL = 0x1200000, 781 RB_DBG_ECO_CNTL = 0x100000, 782 RB_DBG_ECO_CNTL_blit = 0x00100000, # ??? 783 HLSQ_DBG_ECO_CNTL = 0x0, 784 RB_UNKNOWN_8E01 = 0x0, 785 VPC_DBG_ECO_CNTL = 0x2000400, 786 UCHE_UNKNOWN_0E12 = 0x00000001 787 ), 788 raw_magic_regs = [ 789 [A6XXRegs.REG_A6XX_SP_UNKNOWN_AAF2, 0x00c00000], 790 ], 791 )) 792 793# Based on a6xx_base + a6xx_gen4 794a7xx_base = A6XXProps( 795 has_gmem_fast_clear = True, 796 has_hw_multiview = True, 797 has_fs_tex_prefetch = True, 798 has_sampler_minmax = True, 799 800 supports_double_threadsize = True, 801 802 sysmem_per_ccu_depth_cache_size = 256 * 1024, 803 sysmem_per_ccu_color_cache_size = 64 * 1024, 804 gmem_ccu_color_cache_fraction = CCUColorCacheFraction.EIGHTH.value, 805 806 prim_alloc_threshold = 0x7, 807 vs_max_inputs_count = 32, 808 max_sets = 8, 809 810 reg_size_vec4 = 96, 811 # Blob limits it to 128 but we hang with 128 812 instr_cache_size = 127, 813 supports_multiview_mask = True, 814 has_z24uint_s8uint = True, 815 tess_use_shared = True, 816 storage_16bit = True, 817 has_tex_filter_cubic = True, 818 has_separate_chroma_filter = True, 819 has_sample_locations = True, 820 has_lpac = True, 821 has_shading_rate = True, 822 has_getfiberid = True, 823 has_dp2acc = True, 824 has_dp4acc = True, 825 enable_lrz_fast_clear = True, 826 has_lrz_dir_tracking = True, 827 has_lrz_feedback = True, 828 has_per_view_viewport = True, 829 line_width_min = 1.0, 830 line_width_max = 127.5, 831 has_scalar_alu = True, 832 has_coherent_ubwc_flag_caches = True, 833 has_isam_v = True, 834 has_ssbo_imm_offsets = True, 835 has_early_preamble = True, 836 ) 837 838a7xx_725 = A7XXProps( 839 cmdbuf_start_a725_quirk = True, 840 supports_ibo_ubwc = True, 841 fs_must_have_non_zero_constlen_quirk = True, 842 enable_tp_ubwc_flag_hint = True, 843 ) 844 845a7xx_730 = A7XXProps( 846 supports_ibo_ubwc = True, 847 fs_must_have_non_zero_constlen_quirk = True, 848 enable_tp_ubwc_flag_hint = True, 849 ) 850 851a7xx_735 = A7XXProps( 852 stsc_duplication_quirk = True, 853 has_event_write_sample_count = True, 854 ubwc_unorm_snorm_int_compatible = True, 855 supports_ibo_ubwc = True, 856 fs_must_have_non_zero_constlen_quirk = True, 857 enable_tp_ubwc_flag_hint = True, 858 ) 859 860a7xx_740 = A7XXProps( 861 stsc_duplication_quirk = True, 862 has_event_write_sample_count = True, 863 ubwc_unorm_snorm_int_compatible = True, 864 supports_ibo_ubwc = True, 865 fs_must_have_non_zero_constlen_quirk = True, 866 # Most devices with a740 have blob v6xx which doesn't have 867 # this hint set. Match them for better compatibility by default. 868 enable_tp_ubwc_flag_hint = False, 869 ) 870 871a7xx_740_a32 = A7XXProps( 872 cmdbuf_start_a725_quirk = True, 873 stsc_duplication_quirk = True, 874 has_event_write_sample_count = True, 875 ubwc_unorm_snorm_int_compatible = True, 876 supports_ibo_ubwc = True, 877 fs_must_have_non_zero_constlen_quirk = True, 878 enable_tp_ubwc_flag_hint = False, 879 ) 880 881a7xx_750 = A7XXProps( 882 has_event_write_sample_count = True, 883 load_inline_uniforms_via_preamble_ldgk = True, 884 load_shader_consts_via_preamble = True, 885 has_gmem_vpc_attr_buf = True, 886 sysmem_vpc_attr_buf_size = 0x20000, 887 gmem_vpc_attr_buf_size = 0xc000, 888 ubwc_unorm_snorm_int_compatible = True, 889 supports_ibo_ubwc = True, 890 has_generic_clear = True, 891 gs_vpc_adjacency_quirk = True, 892 storage_8bit = True, 893 ubwc_all_formats_compatible = True, 894 has_compliant_dp4acc = True, 895 ubwc_coherency_quirk = True, 896 ) 897 898a730_magic_regs = dict( 899 TPL1_DBG_ECO_CNTL = 0x1000000, 900 GRAS_DBG_ECO_CNTL = 0x800, 901 SP_CHICKEN_BITS = 0x1440, 902 UCHE_CLIENT_PF = 0x00000084, 903 PC_MODE_CNTL = 0x0000003f, # 0x00001f1f in some tests 904 SP_DBG_ECO_CNTL = 0x10000000, 905 RB_DBG_ECO_CNTL = 0x00000000, 906 RB_DBG_ECO_CNTL_blit = 0x00000000, # is it even needed? 907 RB_UNKNOWN_8E01 = 0x0, 908 VPC_DBG_ECO_CNTL = 0x02000000, 909 UCHE_UNKNOWN_0E12 = 0x3200000, 910 911 RB_UNKNOWN_8E06 = 0x02080000, 912 ) 913 914a730_raw_magic_regs = [ 915 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00840004], 916 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724], 917 918 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00002400], 919 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00000000], 920 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 921 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 922 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000040], 923 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00008000], 924 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x20080000], 925 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21fc7f00], 926 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00000000], 927 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 928 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 929 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 930 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 931 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 932 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 933 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 934 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 935 936 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 937 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 938 939 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 940 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 941 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 942 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 943 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 944 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 945 946 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 947 948 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 949 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 950 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 951 952 # Shading rate group 953 [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4, 0x00000000], 954 [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000], 955 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000], 956 ] 957 958add_gpus([ 959 # These are named as Adreno730v3 or Adreno725v1. 960 GPUId(chip_id=0x07030002, name="FD725"), 961 GPUId(chip_id=0xffff07030002, name="FD725"), 962 ], A6xxGPUInfo( 963 CHIP.A7XX, 964 [a7xx_base, a7xx_725], 965 num_ccu = 4, 966 tile_align_w = 64, 967 tile_align_h = 32, 968 num_vsc_pipes = 32, 969 cs_shared_mem_size = 32 * 1024, 970 wave_granularity = 2, 971 fibers_per_sp = 128 * 2 * 16, 972 magic_regs = a730_magic_regs, 973 raw_magic_regs = a730_raw_magic_regs, 974 )) 975 976add_gpus([ 977 GPUId(chip_id=0x07030001, name="FD730"), # KGSL, no speedbin data 978 GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback 979 ], A6xxGPUInfo( 980 CHIP.A7XX, 981 [a7xx_base, a7xx_730], 982 num_ccu = 4, 983 tile_align_w = 64, 984 tile_align_h = 32, 985 num_vsc_pipes = 32, 986 cs_shared_mem_size = 32 * 1024, 987 wave_granularity = 2, 988 fibers_per_sp = 128 * 2 * 16, 989 magic_regs = a730_magic_regs, 990 raw_magic_regs = a730_raw_magic_regs, 991 )) 992 993add_gpus([ 994 GPUId(chip_id=0x43030B00, name="FD735") 995 ], A6xxGPUInfo( 996 CHIP.A7XX, 997 [a7xx_base, a7xx_735], 998 num_ccu = 3, 999 tile_align_w = 96, 1000 tile_align_h = 32, 1001 num_vsc_pipes = 32, 1002 cs_shared_mem_size = 32 * 1024, 1003 wave_granularity = 2, 1004 fibers_per_sp = 128 * 2 * 16, 1005 magic_regs = dict( 1006 TPL1_DBG_ECO_CNTL = 0x11100000, 1007 GRAS_DBG_ECO_CNTL = 0x00004800, 1008 SP_CHICKEN_BITS = 0x10001400, 1009 UCHE_CLIENT_PF = 0x00000084, 1010 PC_MODE_CNTL = 0x0000001f, 1011 SP_DBG_ECO_CNTL = 0x10000000, 1012 RB_DBG_ECO_CNTL = 0x00000001, 1013 RB_DBG_ECO_CNTL_blit = 0x00000001, # is it even needed? 1014 RB_UNKNOWN_8E01 = 0x0, 1015 VPC_DBG_ECO_CNTL = 0x02000000, 1016 UCHE_UNKNOWN_0E12 = 0x00000000, 1017 1018 RB_UNKNOWN_8E06 = 0x02080000, 1019 ), 1020 raw_magic_regs = [ 1021 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000], 1022 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724], 1023 1024 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400], 1025 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800], 1026 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 1027 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1028 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000], 1029 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1030 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1031 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600], 1032 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1033 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1034 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1035 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1036 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1037 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1038 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1039 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1040 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1041 1042 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1043 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1044 1045 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1046 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1047 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1048 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1049 1050 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1051 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1052 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1053 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1054 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1055 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1056 1057 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1058 1059 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 1060 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1061 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1062 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], 1063 1064 # Shading rate group 1065 [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4, 0x00000000], 1066 [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000], 1067 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], 1068 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000], 1069 ], 1070 )) 1071 1072add_gpus([ 1073 GPUId(740), # Deprecated, used for dev kernels. 1074 GPUId(chip_id=0x43050a01, name="FD740"), # KGSL, no speedbin data 1075 GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback 1076 GPUId(chip_id=0x43050B00, name="FD740"), # Quest 3 1077 GPUId(chip_id=0xffff43050B00, name="FD740"), 1078 GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"), 1079 ], A6xxGPUInfo( 1080 CHIP.A7XX, 1081 [a7xx_base, a7xx_740], 1082 num_ccu = 6, 1083 tile_align_w = 96, 1084 tile_align_h = 32, 1085 num_vsc_pipes = 32, 1086 cs_shared_mem_size = 32 * 1024, 1087 wave_granularity = 2, 1088 fibers_per_sp = 128 * 2 * 16, 1089 magic_regs = dict( 1090 # PC_POWER_CNTL = 7, 1091 TPL1_DBG_ECO_CNTL = 0x11100000, 1092 GRAS_DBG_ECO_CNTL = 0x00004800, 1093 SP_CHICKEN_BITS = 0x10001400, 1094 UCHE_CLIENT_PF = 0x00000084, 1095 # Blob uses 0x1f or 0x1f1f, however these values cause vertices 1096 # corruption in some tests. 1097 PC_MODE_CNTL = 0x0000003f, 1098 SP_DBG_ECO_CNTL = 0x10000000, 1099 RB_DBG_ECO_CNTL = 0x00000000, 1100 RB_DBG_ECO_CNTL_blit = 0x00000000, # is it even needed? 1101 # HLSQ_DBG_ECO_CNTL = 0x0, 1102 RB_UNKNOWN_8E01 = 0x0, 1103 VPC_DBG_ECO_CNTL = 0x02000000, 1104 UCHE_UNKNOWN_0E12 = 0x00000000, 1105 1106 RB_UNKNOWN_8E06 = 0x02080000, 1107 ), 1108 raw_magic_regs = [ 1109 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004], 1110 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724], 1111 1112 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400], 1113 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800], 1114 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 1115 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1116 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000], 1117 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1118 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1119 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600], 1120 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1121 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1122 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1123 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1124 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1125 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1126 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1127 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1128 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1129 1130 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1131 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1132 1133 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1134 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1135 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1136 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1137 1138 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1139 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1140 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1141 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1142 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1143 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1144 1145 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1146 1147 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 1148 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1149 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1150 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], 1151 1152 # Shading rate group 1153 [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4, 0x00000000], 1154 [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000], 1155 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], 1156 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000], 1157 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F5, 0x00000000], 1158 ], 1159 )) 1160 1161# Values from blob v676.0 1162add_gpus([ 1163 GPUId(chip_id=0x43050a00, name="FDA32"), # Adreno A32 (G3x Gen 2) 1164 GPUId(chip_id=0xffff43050a00, name="FDA32"), 1165 ], A6xxGPUInfo( 1166 CHIP.A7XX, 1167 [a7xx_base, a7xx_740_a32], 1168 num_ccu = 6, 1169 tile_align_w = 96, 1170 tile_align_h = 32, 1171 num_vsc_pipes = 32, 1172 cs_shared_mem_size = 32 * 1024, 1173 wave_granularity = 2, 1174 fibers_per_sp = 128 * 2 * 16, 1175 magic_regs = dict( 1176 # PC_POWER_CNTL = 7, 1177 TPL1_DBG_ECO_CNTL = 0x11100000, 1178 GRAS_DBG_ECO_CNTL = 0x00004800, 1179 SP_CHICKEN_BITS = 0x10001400, 1180 UCHE_CLIENT_PF = 0x00000084, 1181 # Blob uses 0x1f or 0x1f1f, however these values cause vertices 1182 # corruption in some tests. 1183 PC_MODE_CNTL = 0x0000003f, 1184 SP_DBG_ECO_CNTL = 0x10000000, 1185 RB_DBG_ECO_CNTL = 0x00000000, 1186 RB_DBG_ECO_CNTL_blit = 0x00000000, # is it even needed? 1187 # HLSQ_DBG_ECO_CNTL = 0x0, 1188 RB_UNKNOWN_8E01 = 0x00000000, 1189 VPC_DBG_ECO_CNTL = 0x02000000, 1190 UCHE_UNKNOWN_0E12 = 0x00000000, 1191 1192 RB_UNKNOWN_8E06 = 0x02080000, 1193 ), 1194 raw_magic_regs = [ 1195 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004], 1196 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00000700], 1197 1198 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400], 1199 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430820], 1200 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 1201 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1202 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080], 1203 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1204 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1205 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600], 1206 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1207 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1208 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1209 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1210 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1211 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1212 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1213 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1214 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1215 1216 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1217 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1218 1219 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1220 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1221 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1222 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1223 1224 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1225 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1226 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1227 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1228 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1229 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1230 1231 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1232 1233 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 1234 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1235 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1236 1237 # Shading rate group 1238 [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4, 0x00000000], 1239 [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000], 1240 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000], 1241 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F5, 0x00000000], 1242 ], 1243 )) 1244 1245add_gpus([ 1246 GPUId(chip_id=0x43051401, name="FD750"), # KGSL, no speedbin data 1247 GPUId(chip_id=0xffff43051401, name="FD750"), # Default no-speedbin fallback 1248 ], A6xxGPUInfo( 1249 CHIP.A7XX, 1250 [a7xx_base, a7xx_750], 1251 num_ccu = 6, 1252 tile_align_w = 96, 1253 tile_align_h = 32, 1254 num_vsc_pipes = 32, 1255 cs_shared_mem_size = 32 * 1024, 1256 wave_granularity = 2, 1257 fibers_per_sp = 128 * 2 * 16, 1258 magic_regs = dict( 1259 TPL1_DBG_ECO_CNTL = 0x11100000, 1260 GRAS_DBG_ECO_CNTL = 0x00004800, 1261 SP_CHICKEN_BITS = 0x10000400, 1262 PC_MODE_CNTL = 0x00003f1f, 1263 SP_DBG_ECO_CNTL = 0x10000000, 1264 RB_DBG_ECO_CNTL = 0x00000001, 1265 RB_DBG_ECO_CNTL_blit = 0x00000001, 1266 RB_UNKNOWN_8E01 = 0x0, 1267 VPC_DBG_ECO_CNTL = 0x02000000, 1268 UCHE_UNKNOWN_0E12 = 0x40000000, 1269 1270 RB_UNKNOWN_8E06 = 0x02082000, 1271 ), 1272 raw_magic_regs = [ 1273 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000], 1274 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1275 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080], 1276 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000000], 1277 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00431800], 1278 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00800000], 1279 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1280 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1281 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600], 1282 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1283 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1284 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1285 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1286 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1287 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1288 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1289 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1290 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1291 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1292 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1293 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1294 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1295 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1296 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1297 1298 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1299 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1300 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1301 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1302 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1303 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1304 1305 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1306 1307 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1308 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1309 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], 1310 1311 # Shading rate group 1312 [A6XXRegs.REG_A6XX_RB_UNKNOWN_88F4, 0x00000000], 1313 [A6XXRegs.REG_A7XX_HLSQ_UNKNOWN_A9AD, 0x00000000], 1314 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], 1315 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80F4, 0x00000000], 1316 1317 [0x930a, 0], 1318 [0x960a, 1], 1319 [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS_CONTROL, 0], 1320 [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS, 0], 1321 ], 1322 )) 1323 1324template = """\ 1325/* Copyright © 2021 Google, Inc. 1326 * 1327 * SPDX-License-Identifier: MIT 1328 */ 1329 1330#include "freedreno_dev_info.h" 1331#include "util/u_debug.h" 1332#include "util/log.h" 1333 1334#include <stdlib.h> 1335 1336/* Map python to C: */ 1337#define True true 1338#define False false 1339 1340%for info in s.gpu_infos: 1341static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)}; 1342%endfor 1343 1344static const struct fd_dev_rec fd_dev_recs[] = { 1345%for id, info in s.gpus.items(): 1346 { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} }, 1347%endfor 1348}; 1349 1350void 1351fd_dev_info_apply_dbg_options(struct fd_dev_info *info) 1352{ 1353 const char *env = debug_get_option("FD_DEV_FEATURES", NULL); 1354 if (!env || !*env) 1355 return; 1356 1357 char *features = strdup(env); 1358 char *feature, *feature_end; 1359 feature = strtok_r(features, ":", &feature_end); 1360 while (feature != NULL) { 1361 char *name, *name_end; 1362 name = strtok_r(feature, "=", &name_end); 1363 1364 if (!name) { 1365 mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", feature); 1366 exit(1); 1367 } 1368 1369 char *value = strtok_r(NULL, "=", &name_end); 1370 1371 feature = strtok_r(NULL, ":", &feature_end); 1372 1373%for (prop, gen), val in unique_props.items(): 1374 <% 1375 if isinstance(val, bool): 1376 parse_value = "debug_parse_bool_option" 1377 else: 1378 parse_value = "debug_parse_num_option" 1379 %> 1380 if (strcmp(name, "${prop}") == 0) { 1381 info->${gen}.${prop} = ${parse_value}(value, info->${gen}.${prop}); 1382 continue; 1383 } 1384%endfor 1385 1386 mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", name); 1387 exit(1); 1388 } 1389 1390 free(features); 1391} 1392""" 1393 1394print(Template(template).render(s=s, unique_props=A6XXProps.unique_props)) 1395 1396