1 /*
2 * Copyright © 2023 Imagination Technologies Ltd.
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on radv driver which is:
8 * Copyright © 2016 Red Hat.
9 * Copyright © 2016 Bas Nieuwenhuizen
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
20 * Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * SOFTWARE.
29 */
30
31 #ifndef PVR_COMMON_H
32 #define PVR_COMMON_H
33
34 #include <stdbool.h>
35 #include <stdint.h>
36 #include <vulkan/vulkan.h>
37
38 /* FIXME: Rename this, and ensure it only contains what's
39 * relevant for the driver/compiler interface (no Vulkan types).
40 */
41
42 #include "hwdef/rogue_hw_defs.h"
43 #include "pvr_limits.h"
44 #include "pvr_types.h"
45 #include "util/list.h"
46 #include "util/macros.h"
47 #include "vk_object.h"
48 #include "vk_sampler.h"
49 #include "vk_sync.h"
50
51 #define VK_VENDOR_ID_IMAGINATION 0x1010
52
53 #define PVR_WORKGROUP_DIMENSIONS 3U
54
55 #define PVR_SAMPLER_DESCRIPTOR_SIZE 4U
56 #define PVR_IMAGE_DESCRIPTOR_SIZE 4U
57
58 #define PVR_STATE_PBE_DWORDS 2U
59
60 #define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \
61 (uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U)
62
63 #define PVR_TRANSFER_MAX_LAYERS 1U
64 #define PVR_TRANSFER_MAX_LOADS 4U
65 #define PVR_TRANSFER_MAX_IMAGES \
66 (PVR_TRANSFER_MAX_LAYERS * PVR_TRANSFER_MAX_LOADS)
67
68 /* TODO: move into a common surface library? */
69 enum pvr_memlayout {
70 PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */
71 PVR_MEMLAYOUT_LINEAR,
72 PVR_MEMLAYOUT_TWIDDLED,
73 PVR_MEMLAYOUT_3DTWIDDLED,
74 };
75
76 enum pvr_texture_state {
77 PVR_TEXTURE_STATE_SAMPLE,
78 PVR_TEXTURE_STATE_STORAGE,
79 PVR_TEXTURE_STATE_ATTACHMENT,
80 PVR_TEXTURE_STATE_MAX_ENUM,
81 };
82
83 enum pvr_sub_cmd_type {
84 PVR_SUB_CMD_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
85 PVR_SUB_CMD_TYPE_GRAPHICS,
86 PVR_SUB_CMD_TYPE_COMPUTE,
87 PVR_SUB_CMD_TYPE_TRANSFER,
88 PVR_SUB_CMD_TYPE_OCCLUSION_QUERY,
89 PVR_SUB_CMD_TYPE_EVENT,
90 };
91
92 enum pvr_event_type {
93 PVR_EVENT_TYPE_SET,
94 PVR_EVENT_TYPE_RESET,
95 PVR_EVENT_TYPE_WAIT,
96 PVR_EVENT_TYPE_BARRIER,
97 };
98
99 enum pvr_depth_stencil_usage {
100 PVR_DEPTH_STENCIL_USAGE_UNDEFINED = 0, /* explicitly treat 0 as undefined */
101 PVR_DEPTH_STENCIL_USAGE_NEEDED,
102 PVR_DEPTH_STENCIL_USAGE_NEVER,
103 };
104
105 enum pvr_job_type {
106 PVR_JOB_TYPE_GEOM,
107 PVR_JOB_TYPE_FRAG,
108 PVR_JOB_TYPE_COMPUTE,
109 PVR_JOB_TYPE_TRANSFER,
110 PVR_JOB_TYPE_OCCLUSION_QUERY,
111 PVR_JOB_TYPE_MAX
112 };
113
114 enum pvr_pipeline_type {
115 PVR_PIPELINE_TYPE_INVALID = 0, /* explicitly treat 0 as undefined */
116 PVR_PIPELINE_TYPE_GRAPHICS,
117 PVR_PIPELINE_TYPE_COMPUTE,
118 };
119
120 enum pvr_pipeline_stage_bits {
121 PVR_PIPELINE_STAGE_GEOM_BIT = BITFIELD_BIT(PVR_JOB_TYPE_GEOM),
122 PVR_PIPELINE_STAGE_FRAG_BIT = BITFIELD_BIT(PVR_JOB_TYPE_FRAG),
123 PVR_PIPELINE_STAGE_COMPUTE_BIT = BITFIELD_BIT(PVR_JOB_TYPE_COMPUTE),
124 PVR_PIPELINE_STAGE_TRANSFER_BIT = BITFIELD_BIT(PVR_JOB_TYPE_TRANSFER),
125 /* Note that this doesn't map to VkPipelineStageFlagBits so be careful with
126 * this.
127 */
128 PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT =
129 BITFIELD_BIT(PVR_JOB_TYPE_OCCLUSION_QUERY),
130 };
131
132 #define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \
133 (PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT)
134
135 #define PVR_PIPELINE_STAGE_ALL_BITS \
136 (PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS | PVR_PIPELINE_STAGE_COMPUTE_BIT | \
137 PVR_PIPELINE_STAGE_TRANSFER_BIT)
138
139 #define PVR_NUM_SYNC_PIPELINE_STAGES 4U
140
141 /* Warning: Do not define an invalid stage as 0 since other code relies on 0
142 * being the first shader stage. This allows for stages to be split or added
143 * in the future. Defining 0 as invalid will very likely cause problems.
144 */
145 enum pvr_stage_allocation {
146 PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
147 PVR_STAGE_ALLOCATION_FRAGMENT,
148 PVR_STAGE_ALLOCATION_COMPUTE,
149 PVR_STAGE_ALLOCATION_COUNT
150 };
151
152 enum pvr_filter {
153 PVR_FILTER_DONTCARE, /* Any filtering mode is acceptable. */
154 PVR_FILTER_POINT,
155 PVR_FILTER_LINEAR,
156 PVR_FILTER_BICUBIC,
157 };
158
159 enum pvr_resolve_op {
160 PVR_RESOLVE_BLEND,
161 PVR_RESOLVE_MIN,
162 PVR_RESOLVE_MAX,
163 PVR_RESOLVE_SAMPLE0,
164 PVR_RESOLVE_SAMPLE1,
165 PVR_RESOLVE_SAMPLE2,
166 PVR_RESOLVE_SAMPLE3,
167 PVR_RESOLVE_SAMPLE4,
168 PVR_RESOLVE_SAMPLE5,
169 PVR_RESOLVE_SAMPLE6,
170 PVR_RESOLVE_SAMPLE7,
171 };
172
173 enum pvr_event_state {
174 PVR_EVENT_STATE_SET_BY_HOST,
175 PVR_EVENT_STATE_RESET_BY_HOST,
176 PVR_EVENT_STATE_SET_BY_DEVICE,
177 PVR_EVENT_STATE_RESET_BY_DEVICE
178 };
179
180 enum pvr_deferred_cs_command_type {
181 PVR_DEFERRED_CS_COMMAND_TYPE_DBSC,
182 PVR_DEFERRED_CS_COMMAND_TYPE_DBSC2,
183 };
184
185 enum pvr_query_type {
186 PVR_QUERY_TYPE_AVAILABILITY_WRITE,
187 PVR_QUERY_TYPE_RESET_QUERY_POOL,
188 PVR_QUERY_TYPE_COPY_QUERY_RESULTS,
189 };
190
191 union pvr_sampler_descriptor {
192 uint32_t words[PVR_SAMPLER_DESCRIPTOR_SIZE];
193
194 struct {
195 /* Packed PVRX(TEXSTATE_SAMPLER). */
196 uint64_t sampler_word;
197 uint32_t compare_op;
198 /* TODO: Figure out what this word is for and rename.
199 * Sampler state word 1?
200 */
201 uint32_t word3;
202 } data;
203 };
204
205 struct pvr_combined_image_sampler_descriptor {
206 /* | TEXSTATE_IMAGE_WORD0 | TEXSTATE_{STRIDE_,}IMAGE_WORD1 | */
207 uint64_t image[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
208 union pvr_sampler_descriptor sampler;
209 };
210
211 #define CHECK_STRUCT_FIELD_SIZE(_struct_type, _field_name, _size) \
212 static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) == \
213 (_size), \
214 "Size of '" #_field_name "' in '" #_struct_type \
215 "' differs from expected")
216
217 CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
218 image,
219 ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t));
220 CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
221 image,
222 PVR_DW_TO_BYTES(PVR_IMAGE_DESCRIPTOR_SIZE));
223 #if 0
224 /* TODO: Don't really want to include pvr_csb.h in here since this header is
225 * shared with the compiler. Figure out a better place for these.
226 */
227 CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
228 image,
229 (pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
230 pvr_cmd_length(TEXSTATE_IMAGE_WORD1)) *
231 sizeof(uint32_t));
232 CHECK_STRUCT_FIELD_SIZE(pvr_combined_image_sampler_descriptor,
233 image,
234 (pvr_cmd_length(TEXSTATE_IMAGE_WORD0) +
235 pvr_cmd_length(TEXSTATE_STRIDE_IMAGE_WORD1)) *
236 sizeof(uint32_t));
237 #endif
238
239 #undef CHECK_STRUCT_FIELD_SIZE
240
241 struct pvr_sampler {
242 struct vk_sampler vk;
243
244 union pvr_sampler_descriptor descriptor;
245 };
246
247 struct pvr_descriptor_size_info {
248 /* Non-spillable size for storage in the common store. */
249 uint32_t primary;
250
251 /* Spillable size to accommodate limitation of the common store. */
252 uint32_t secondary;
253
254 uint32_t alignment;
255 };
256
257 struct pvr_descriptor_set_layout_binding {
258 VkDescriptorType type;
259
260 /* "M" in layout(set = N, binding = M)
261 * Can be used to index bindings in the descriptor_set_layout.
262 */
263 uint32_t binding_number;
264
265 uint32_t descriptor_count;
266
267 /* Index into the flattened descriptor set */
268 uint16_t descriptor_index;
269
270 /* Mask of enum pvr_stage_allocation. */
271 uint8_t shader_stage_mask;
272
273 struct {
274 uint32_t primary;
275 uint32_t secondary;
276 } per_stage_offset_in_dwords[PVR_STAGE_ALLOCATION_COUNT];
277
278 bool has_immutable_samplers;
279 /* Index at which the samplers can be found in the descriptor_set_layout.
280 * 0 when the samplers are at index 0 or no samplers are present.
281 */
282 uint32_t immutable_samplers_index;
283 };
284
285 /* All sizes are in dwords. */
286 struct pvr_descriptor_set_layout_mem_layout {
287 uint32_t primary_offset;
288 uint32_t primary_size;
289
290 uint32_t secondary_offset;
291 uint32_t secondary_size;
292
293 uint32_t primary_dynamic_size;
294 uint32_t secondary_dynamic_size;
295 };
296
297 struct pvr_descriptor_set_layout {
298 struct vk_object_base base;
299
300 /* Total amount of descriptors contained in this set. */
301 uint32_t descriptor_count;
302
303 /* Count of dynamic buffers. */
304 uint32_t dynamic_buffer_count;
305 uint32_t total_dynamic_size_in_dwords;
306
307 uint32_t binding_count;
308 struct pvr_descriptor_set_layout_binding *bindings;
309
310 uint32_t immutable_sampler_count;
311 const struct pvr_sampler **immutable_samplers;
312
313 /* Shader stages requiring access to descriptors in this set. */
314 /* Mask of enum pvr_stage_allocation. */
315 uint8_t shader_stage_mask;
316
317 /* Count of each VkDescriptorType per shader stage. Dynamically allocated
318 * arrays per stage as to not hard code the max descriptor type here.
319 *
320 * Note: when adding a new type, it might not numerically follow the
321 * previous type so a sparse array will be created. You might want to
322 * readjust how these arrays are created and accessed.
323 */
324 uint32_t *per_stage_descriptor_count[PVR_STAGE_ALLOCATION_COUNT];
325
326 uint32_t total_size_in_dwords;
327 struct pvr_descriptor_set_layout_mem_layout
328 memory_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT];
329 };
330
331 struct pvr_descriptor_pool {
332 struct vk_object_base base;
333
334 VkAllocationCallbacks alloc;
335
336 /* Saved information from pCreateInfo. */
337 uint32_t max_sets;
338
339 uint32_t total_size_in_dwords;
340 uint32_t current_size_in_dwords;
341
342 /* Derived and other state. */
343 /* List of the descriptor sets created using this pool. */
344 struct list_head descriptor_sets;
345 };
346
347 struct pvr_descriptor {
348 VkDescriptorType type;
349
350 union {
351 struct {
352 struct pvr_buffer_view *bview;
353 pvr_dev_addr_t buffer_dev_addr;
354 VkDeviceSize buffer_desc_range;
355 VkDeviceSize buffer_whole_range;
356 };
357
358 struct {
359 VkImageLayout layout;
360 const struct pvr_image_view *iview;
361 const struct pvr_sampler *sampler;
362 };
363 };
364 };
365
366 struct pvr_descriptor_set {
367 struct vk_object_base base;
368
369 const struct pvr_descriptor_set_layout *layout;
370 const struct pvr_descriptor_pool *pool;
371
372 struct pvr_suballoc_bo *pvr_bo;
373
374 /* Links this descriptor set into pvr_descriptor_pool::descriptor_sets list.
375 */
376 struct list_head link;
377
378 /* Array of size layout::descriptor_count. */
379 struct pvr_descriptor descriptors[0];
380 };
381
382 struct pvr_event {
383 struct vk_object_base base;
384
385 enum pvr_event_state state;
386 struct vk_sync *sync;
387 };
388
389 #define PVR_MAX_DYNAMIC_BUFFERS \
390 (PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS + \
391 PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS)
392
393 struct pvr_descriptor_state {
394 struct pvr_descriptor_set *descriptor_sets[PVR_MAX_DESCRIPTOR_SETS];
395 uint32_t valid_mask;
396
397 uint32_t dynamic_offsets[PVR_MAX_DYNAMIC_BUFFERS];
398 };
399
400 #undef PVR_MAX_DYNAMIC_BUFFERS
401
402 /**
403 * \brief Indicates the layout of shared registers allocated by the driver.
404 *
405 * 'present' fields indicate if a certain resource was allocated for, and
406 * whether it will be present in the shareds.
407 * 'offset' fields indicate at which shared reg the resource starts at.
408 */
409 struct pvr_sh_reg_layout {
410 /* If this is present, it will always take up 2 sh regs in size and contain
411 * the device address of the descriptor set addrs table.
412 */
413 struct {
414 bool present;
415 uint32_t offset;
416 } descriptor_set_addrs_table;
417
418 /* If this is present, it will always take up 2 sh regs in size and contain
419 * the device address of the push constants buffer.
420 */
421 struct {
422 bool present;
423 uint32_t offset;
424 } push_consts;
425
426 /* If this is present, it will always take up 2 sh regs in size and contain
427 * the device address of the blend constants buffer.
428 */
429 struct {
430 bool present;
431 uint32_t offset;
432 } blend_consts;
433 };
434
435 struct pvr_pipeline_layout {
436 struct vk_object_base base;
437
438 uint32_t set_count;
439 /* Contains set_count amount of descriptor set layouts. */
440 struct pvr_descriptor_set_layout *set_layout[PVR_MAX_DESCRIPTOR_SETS];
441
442 /* Mask of enum pvr_stage_allocation. */
443 uint8_t push_constants_shader_stages;
444 uint32_t vert_push_constants_offset;
445 uint32_t frag_push_constants_offset;
446 uint32_t compute_push_constants_offset;
447
448 /* Mask of enum pvr_stage_allocation. */
449 uint8_t shader_stage_mask;
450
451 /* Per stage masks indicating which set in the layout contains any
452 * descriptor of the appropriate types: VK..._{SAMPLER, SAMPLED_IMAGE,
453 * UNIFORM_TEXEL_BUFFER, UNIFORM_BUFFER, STORAGE_BUFFER}.
454 * Shift by the set's number to check the mask (1U << set_num).
455 */
456 uint32_t per_stage_descriptor_masks[PVR_STAGE_ALLOCATION_COUNT];
457
458 /* Array of descriptor offsets at which the set's descriptors' start, per
459 * stage, within all the sets in the pipeline layout per descriptor type.
460 * Note that we only store into for specific descriptor types
461 * VK_DESCRIPTOR_TYPE_{SAMPLER, SAMPLED_IMAGE, UNIFORM_TEXEL_BUFFER,
462 * UNIFORM_BUFFER, STORAGE_BUFFER}, the rest will be 0.
463 */
464 uint32_t
465 descriptor_offsets[PVR_MAX_DESCRIPTOR_SETS][PVR_STAGE_ALLOCATION_COUNT]
466 [PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT];
467
468 /* There is no accounting for dynamics in here. They will be garbage values.
469 */
470 struct pvr_descriptor_set_layout_mem_layout
471 register_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT]
472 [PVR_MAX_DESCRIPTOR_SETS];
473
474 /* TODO: Consider whether this needs to be here. */
475 struct pvr_sh_reg_layout sh_reg_layout_per_stage[PVR_STAGE_ALLOCATION_COUNT];
476
477 /* All sizes in dwords. */
478 struct pvr_pipeline_layout_reg_info {
479 uint32_t primary_dynamic_size_in_dwords;
480 uint32_t secondary_dynamic_size_in_dwords;
481 } per_stage_reg_info[PVR_STAGE_ALLOCATION_COUNT];
482 };
483
pvr_compare_layout_binding(const void * a,const void * b)484 static int pvr_compare_layout_binding(const void *a, const void *b)
485 {
486 uint32_t binding_a;
487 uint32_t binding_b;
488
489 binding_a = ((struct pvr_descriptor_set_layout_binding *)a)->binding_number;
490 binding_b = ((struct pvr_descriptor_set_layout_binding *)b)->binding_number;
491
492 if (binding_a < binding_b)
493 return -1;
494
495 if (binding_a > binding_b)
496 return 1;
497
498 return 0;
499 }
500
501 /* This function does not assume that the binding will always exist for a
502 * particular binding_num. Caller should check before using the return pointer.
503 */
504 static struct pvr_descriptor_set_layout_binding *
pvr_get_descriptor_binding(const struct pvr_descriptor_set_layout * layout,const uint32_t binding_num)505 pvr_get_descriptor_binding(const struct pvr_descriptor_set_layout *layout,
506 const uint32_t binding_num)
507 {
508 struct pvr_descriptor_set_layout_binding binding;
509 binding.binding_number = binding_num;
510
511 return bsearch(&binding,
512 layout->bindings,
513 layout->binding_count,
514 sizeof(binding),
515 pvr_compare_layout_binding);
516 }
517
518 #endif /* PVR_COMMON_H */
519