1 /*
2 * Copyright © 2024 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radv_printf.h"
8 #include "radv_device.h"
9 #include "radv_physical_device.h"
10
11 #include "util/hash_table.h"
12 #include "util/strndup.h"
13 #include "util/u_printf.h"
14
15 #include "nir.h"
16 #include "nir_builder.h"
17
18 static struct hash_table *device_ht = NULL;
19
20 VkResult
radv_printf_data_init(struct radv_device * device)21 radv_printf_data_init(struct radv_device *device)
22 {
23 const struct radv_physical_device *pdev = radv_device_physical(device);
24
25 util_dynarray_init(&device->printf.formats, NULL);
26
27 device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
28 if (device->printf.buffer_size < sizeof(struct radv_printf_buffer_header))
29 return VK_SUCCESS;
30
31 VkBufferCreateInfo buffer_create_info = {
32 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
33 .pNext =
34 &(VkBufferUsageFlags2CreateInfoKHR){
35 .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
36 .usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR,
37 },
38 .size = device->printf.buffer_size,
39 };
40
41 VkDevice _device = radv_device_to_handle(device);
42 VkResult result = device->vk.dispatch_table.CreateBuffer(_device, &buffer_create_info, NULL, &device->printf.buffer);
43 if (result != VK_SUCCESS)
44 return result;
45
46 VkMemoryRequirements requirements;
47 device->vk.dispatch_table.GetBufferMemoryRequirements(_device, device->printf.buffer, &requirements);
48
49 VkMemoryAllocateInfo alloc_info = {
50 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
51 .allocationSize = requirements.size,
52 .memoryTypeIndex =
53 radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
54 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
55 };
56
57 result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory);
58 if (result != VK_SUCCESS)
59 return result;
60
61 result = device->vk.dispatch_table.MapMemory(_device, device->printf.memory, 0, VK_WHOLE_SIZE, 0,
62 (void **)&device->printf.data);
63 if (result != VK_SUCCESS)
64 return result;
65
66 result = device->vk.dispatch_table.BindBufferMemory(_device, device->printf.buffer, device->printf.memory, 0);
67 if (result != VK_SUCCESS)
68 return result;
69
70 struct radv_printf_buffer_header *header = device->printf.data;
71 header->offset = sizeof(struct radv_printf_buffer_header);
72 header->size = device->printf.buffer_size;
73
74 VkBufferDeviceAddressInfo addr_info = {
75 .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
76 .buffer = device->printf.buffer,
77 };
78 device->printf.buffer_addr = device->vk.dispatch_table.GetBufferDeviceAddress(_device, &addr_info);
79
80 return VK_SUCCESS;
81 }
82
83 void
radv_printf_data_finish(struct radv_device * device)84 radv_printf_data_finish(struct radv_device *device)
85 {
86 VkDevice _device = radv_device_to_handle(device);
87
88 device->vk.dispatch_table.DestroyBuffer(_device, device->printf.buffer, NULL);
89 if (device->printf.memory)
90 device->vk.dispatch_table.UnmapMemory(_device, device->printf.memory);
91 device->vk.dispatch_table.FreeMemory(_device, device->printf.memory, NULL);
92
93 util_dynarray_foreach (&device->printf.formats, struct radv_printf_format, format)
94 free(format->string);
95
96 util_dynarray_fini(&device->printf.formats);
97 }
98
99 void
radv_build_printf(nir_builder * b,nir_def * cond,const char * format_string,...)100 radv_build_printf(nir_builder *b, nir_def *cond, const char *format_string, ...)
101 {
102 if (!device_ht)
103 return;
104
105 struct radv_device *device = _mesa_hash_table_search(device_ht, b->shader)->data;
106 if (!device->printf.buffer_addr)
107 return;
108
109 struct radv_printf_format format = {0};
110 format.string = strdup(format_string);
111 if (!format.string)
112 return;
113
114 uint32_t format_index = util_dynarray_num_elements(&device->printf.formats, struct radv_printf_format);
115
116 if (cond)
117 nir_push_if(b, cond);
118
119 nir_def *size = nir_imm_int(b, 4);
120
121 va_list arg_list;
122 va_start(arg_list, format_string);
123
124 uint32_t num_args = 0;
125 for (uint32_t i = 0; i < strlen(format_string); i++)
126 if (format_string[i] == '%')
127 num_args++;
128
129 nir_def **args = malloc(num_args * sizeof(nir_def *));
130 nir_def **strides = malloc(num_args * sizeof(nir_def *));
131
132 nir_def *ballot = nir_ballot(b, 1, 64, nir_imm_true(b));
133 nir_def *active_invocation_count = nir_bit_count(b, ballot);
134
135 for (uint32_t i = 0; i < num_args; i++) {
136 nir_def *arg = va_arg(arg_list, nir_def *);
137
138 if (arg->bit_size == 1)
139 arg = nir_b2i32(b, arg);
140
141 args[i] = arg;
142
143 uint32_t arg_size = arg->bit_size == 1 ? 32 : arg->bit_size / 8;
144 format.element_sizes[i] = arg_size;
145
146 nir_update_instr_divergence(b->shader, arg->parent_instr);
147
148 if (arg->divergent) {
149 strides[i] = nir_imul_imm(b, active_invocation_count, arg_size);
150 format.divergence_mask |= BITFIELD_BIT(i);
151 } else {
152 strides[i] = nir_imm_int(b, arg_size);
153 }
154
155 size = nir_iadd(b, size, strides[i]);
156 }
157
158 va_end(arg_list);
159
160 nir_def *offset;
161 nir_def *undef;
162
163 nir_push_if(b, nir_elect(b, 1));
164 {
165 offset = nir_global_atomic(
166 b, 32, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, offset)), size,
167 .atomic_op = nir_atomic_op_iadd);
168 }
169 nir_push_else(b, NULL);
170 {
171 undef = nir_undef(b, 1, 32);
172 }
173 nir_pop_if(b, NULL);
174
175 offset = nir_read_first_invocation(b, nir_if_phi(b, offset, undef));
176
177 nir_def *buffer_size = nir_load_global(
178 b, nir_imm_int64(b, device->printf.buffer_addr + offsetof(struct radv_printf_buffer_header, size)), 4, 1, 32);
179
180 nir_push_if(b, nir_ige(b, buffer_size, nir_iadd(b, offset, size)));
181 {
182 nir_def *addr = nir_iadd_imm(b, nir_u2u64(b, offset), device->printf.buffer_addr);
183
184 /* header */
185 nir_store_global(b, addr, 4, nir_ior_imm(b, active_invocation_count, format_index << 16), 1);
186 addr = nir_iadd_imm(b, addr, 4);
187
188 for (uint32_t i = 0; i < num_args; i++) {
189 nir_def *arg = args[i];
190
191 if (arg->divergent) {
192 nir_def *invocation_index = nir_mbcnt_amd(b, ballot, nir_imm_int(b, 0));
193 nir_store_global(
194 b, nir_iadd(b, addr, nir_u2u64(b, nir_imul_imm(b, invocation_index, format.element_sizes[i]))), 4, arg,
195 1);
196 } else {
197 nir_store_global(b, addr, 4, arg, 1);
198 }
199
200 addr = nir_iadd(b, addr, nir_u2u64(b, strides[i]));
201 }
202 }
203 nir_pop_if(b, NULL);
204
205 if (cond)
206 nir_pop_if(b, NULL);
207
208 free(args);
209 free(strides);
210
211 util_dynarray_append(&device->printf.formats, struct radv_printf_format, format);
212 }
213
214 void
radv_dump_printf_data(struct radv_device * device,FILE * out)215 radv_dump_printf_data(struct radv_device *device, FILE *out)
216 {
217 if (!device->printf.data)
218 return;
219
220 device->vk.dispatch_table.DeviceWaitIdle(radv_device_to_handle(device));
221
222 struct radv_printf_buffer_header *header = device->printf.data;
223 uint8_t *data = device->printf.data;
224
225 for (uint32_t offset = sizeof(struct radv_printf_buffer_header); offset < header->offset;) {
226 uint32_t printf_header = *(uint32_t *)&data[offset];
227 offset += sizeof(uint32_t);
228
229 uint32_t format_index = printf_header >> 16;
230 struct radv_printf_format *printf_format =
231 util_dynarray_element(&device->printf.formats, struct radv_printf_format, format_index);
232
233 uint32_t invocation_count = printf_header & 0xFFFF;
234
235 uint32_t num_args = 0;
236 for (uint32_t i = 0; i < strlen(printf_format->string); i++)
237 if (printf_format->string[i] == '%')
238 num_args++;
239
240 char *format = printf_format->string;
241
242 for (uint32_t i = 0; i <= num_args; i++) {
243 size_t spec_pos = util_printf_next_spec_pos(format, 0);
244
245 if (spec_pos == -1) {
246 fprintf(out, "%s", format);
247 continue;
248 }
249
250 const char *token = util_printf_prev_tok(&format[spec_pos]);
251 char *next_format = &format[spec_pos + 1];
252
253 /* print the part before the format token */
254 if (token != format)
255 fwrite(format, token - format, 1, out);
256
257 char *print_str = strndup(token, next_format - token);
258 /* rebase spec_pos so we can use it with print_str */
259 spec_pos += format - token;
260
261 size_t element_size = printf_format->element_sizes[i];
262 bool is_float = strpbrk(print_str, "fFeEgGaA") != NULL;
263
264 uint32_t lane_count = (printf_format->divergence_mask & BITFIELD_BIT(i)) ? invocation_count : 1;
265 for (uint32_t lane = 0; lane < lane_count; lane++) {
266 switch (element_size) {
267 case 1: {
268 uint8_t v;
269 memcpy(&v, &data[offset], element_size);
270 fprintf(out, print_str, v);
271 break;
272 }
273 case 2: {
274 uint16_t v;
275 memcpy(&v, &data[offset], element_size);
276 fprintf(out, print_str, v);
277 break;
278 }
279 case 4: {
280 if (is_float) {
281 float v;
282 memcpy(&v, &data[offset], element_size);
283 fprintf(out, print_str, v);
284 } else {
285 uint32_t v;
286 memcpy(&v, &data[offset], element_size);
287 fprintf(out, print_str, v);
288 }
289 break;
290 }
291 case 8: {
292 if (is_float) {
293 double v;
294 memcpy(&v, &data[offset], element_size);
295 fprintf(out, print_str, v);
296 } else {
297 uint64_t v;
298 memcpy(&v, &data[offset], element_size);
299 fprintf(out, print_str, v);
300 }
301 break;
302 }
303 default:
304 unreachable("Unsupported data type");
305 }
306
307 if (lane != lane_count - 1)
308 fprintf(out, " ");
309
310 offset += element_size;
311 }
312
313 /* rebase format */
314 format = next_format;
315 free(print_str);
316 }
317 }
318
319 fflush(out);
320
321 header->offset = sizeof(struct radv_printf_buffer_header);
322 }
323
324 void
radv_device_associate_nir(struct radv_device * device,nir_shader * nir)325 radv_device_associate_nir(struct radv_device *device, nir_shader *nir)
326 {
327 if (!device->printf.buffer_addr)
328 return;
329
330 if (!device_ht)
331 device_ht = _mesa_pointer_hash_table_create(NULL);
332
333 _mesa_hash_table_insert(device_ht, nir, device);
334 }
335