1 /**************************************************************************
2 *
3 * Copyright 2012 Marek Olšák <[email protected]>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/format/format_utils.h"
29 #include "util/u_cpu_detect.h"
30 #include "util/u_helpers.h"
31 #include "util/u_inlines.h"
32 #include "util/u_upload_mgr.h"
33 #include "util/u_thread.h"
34 #include "util/os_time.h"
35 #include "util/perf/cpu_trace.h"
36 #include <inttypes.h>
37
38 /**
39 * This function is used to copy an array of pipe_vertex_buffer structures,
40 * while properly referencing the pipe_vertex_buffer::buffer member.
41 *
42 * enabled_buffers is updated such that the bits corresponding to the indices
43 * of disabled buffers are set to 0 and the enabled ones are set to 1.
44 *
45 * \sa util_copy_framebuffer_state
46 */
util_set_vertex_buffers_mask(struct pipe_vertex_buffer * dst,uint32_t * enabled_buffers,const struct pipe_vertex_buffer * src,unsigned count,bool take_ownership)47 void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
48 uint32_t *enabled_buffers,
49 const struct pipe_vertex_buffer *src,
50 unsigned count,
51 bool take_ownership)
52 {
53 unsigned last_count = util_last_bit(*enabled_buffers);
54 uint32_t bitmask = 0;
55 unsigned i = 0;
56
57 assert(!count || src);
58
59 if (src) {
60 for (; i < count; i++) {
61 if (src[i].buffer.resource)
62 bitmask |= 1 << i;
63
64 pipe_vertex_buffer_unreference(&dst[i]);
65
66 if (!take_ownership && !src[i].is_user_buffer)
67 pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource);
68 }
69
70 /* Copy over the other members of pipe_vertex_buffer. */
71 memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
72 }
73
74 *enabled_buffers = bitmask;
75
76 for (; i < last_count; i++)
77 pipe_vertex_buffer_unreference(&dst[i]);
78 }
79
80 /**
81 * Same as util_set_vertex_buffers_mask, but it only returns the number
82 * of bound buffers.
83 */
util_set_vertex_buffers_count(struct pipe_vertex_buffer * dst,unsigned * dst_count,const struct pipe_vertex_buffer * src,unsigned count,bool take_ownership)84 void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
85 unsigned *dst_count,
86 const struct pipe_vertex_buffer *src,
87 unsigned count,
88 bool take_ownership)
89 {
90 uint32_t enabled_buffers = 0;
91
92 for (unsigned i = 0; i < *dst_count; i++) {
93 if (dst[i].buffer.resource)
94 enabled_buffers |= (1ull << i);
95 }
96
97 util_set_vertex_buffers_mask(dst, &enabled_buffers, src, count,
98 take_ownership);
99
100 *dst_count = util_last_bit(enabled_buffers);
101 }
102
103 /**
104 * This function is used to copy an array of pipe_shader_buffer structures,
105 * while properly referencing the pipe_shader_buffer::buffer member.
106 *
107 * \sa util_set_vertex_buffer_mask
108 */
util_set_shader_buffers_mask(struct pipe_shader_buffer * dst,uint32_t * enabled_buffers,const struct pipe_shader_buffer * src,unsigned start_slot,unsigned count)109 void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst,
110 uint32_t *enabled_buffers,
111 const struct pipe_shader_buffer *src,
112 unsigned start_slot, unsigned count)
113 {
114 unsigned i;
115
116 dst += start_slot;
117
118 if (src) {
119 for (i = 0; i < count; i++) {
120 pipe_resource_reference(&dst[i].buffer, src[i].buffer);
121
122 if (src[i].buffer)
123 *enabled_buffers |= (1ull << (start_slot + i));
124 else
125 *enabled_buffers &= ~(1ull << (start_slot + i));
126 }
127
128 /* Copy over the other members of pipe_shader_buffer. */
129 memcpy(dst, src, count * sizeof(struct pipe_shader_buffer));
130 }
131 else {
132 /* Unreference the buffers. */
133 for (i = 0; i < count; i++)
134 pipe_resource_reference(&dst[i].buffer, NULL);
135
136 *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
137 }
138 }
139
140 /**
141 * Given a user index buffer, save the structure to "saved", and upload it.
142 */
143 bool
util_upload_index_buffer(struct pipe_context * pipe,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,struct pipe_resource ** out_buffer,unsigned * out_offset,unsigned alignment)144 util_upload_index_buffer(struct pipe_context *pipe,
145 const struct pipe_draw_info *info,
146 const struct pipe_draw_start_count_bias *draw,
147 struct pipe_resource **out_buffer,
148 unsigned *out_offset, unsigned alignment)
149 {
150 unsigned start_offset = draw->start * info->index_size;
151
152 u_upload_data(pipe->stream_uploader, start_offset,
153 draw->count * info->index_size, alignment,
154 (char*)info->index.user + start_offset,
155 out_offset, out_buffer);
156 u_upload_unmap(pipe->stream_uploader);
157 *out_offset -= start_offset;
158 return *out_buffer != NULL;
159 }
160
161 /**
162 * Lower each UINT64 vertex element to 1 or 2 UINT32 vertex elements.
163 * 3 and 4 component formats are expanded into 2 slots.
164 *
165 * @param velems Original vertex elements, will be updated to contain
166 * the lowered vertex elements.
167 * @param velem_count Original count, will be updated to contain the count
168 * after lowering.
169 * @param tmp Temporary array of PIPE_MAX_ATTRIBS vertex elements.
170 */
171 void
util_lower_uint64_vertex_elements(const struct pipe_vertex_element ** velems,unsigned * velem_count,struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS])172 util_lower_uint64_vertex_elements(const struct pipe_vertex_element **velems,
173 unsigned *velem_count,
174 struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS])
175 {
176 const struct pipe_vertex_element *input = *velems;
177 unsigned count = *velem_count;
178 bool has_64bit = false;
179
180 for (unsigned i = 0; i < count; i++) {
181 has_64bit |= input[i].src_format >= PIPE_FORMAT_R64_UINT &&
182 input[i].src_format <= PIPE_FORMAT_R64G64B64A64_UINT;
183 }
184
185 /* Return the original vertex elements if there is nothing to do. */
186 if (!has_64bit)
187 return;
188
189 /* Lower 64_UINT to 32_UINT. */
190 unsigned new_count = 0;
191
192 for (unsigned i = 0; i < count; i++) {
193 enum pipe_format format = input[i].src_format;
194
195 /* If the shader input is dvec2 or smaller, reduce the number of
196 * components to 2 at most. If the shader input is dvec3 or larger,
197 * expand the number of components to 3 at least. If the 3rd component
198 * is out of bounds, the hardware shouldn't skip loading the first
199 * 2 components.
200 */
201 if (format >= PIPE_FORMAT_R64_UINT &&
202 format <= PIPE_FORMAT_R64G64B64A64_UINT) {
203 if (input[i].dual_slot)
204 format = MAX2(format, PIPE_FORMAT_R64G64B64_UINT);
205 else
206 format = MIN2(format, PIPE_FORMAT_R64G64_UINT);
207 }
208
209 switch (format) {
210 case PIPE_FORMAT_R64_UINT:
211 tmp[new_count] = input[i];
212 tmp[new_count].src_format = PIPE_FORMAT_R32G32_UINT;
213 new_count++;
214 break;
215
216 case PIPE_FORMAT_R64G64_UINT:
217 tmp[new_count] = input[i];
218 tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
219 new_count++;
220 break;
221
222 case PIPE_FORMAT_R64G64B64_UINT:
223 case PIPE_FORMAT_R64G64B64A64_UINT:
224 assert(new_count + 2 <= PIPE_MAX_ATTRIBS);
225 tmp[new_count] = tmp[new_count + 1] = input[i];
226 tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
227 tmp[new_count + 1].src_format =
228 format == PIPE_FORMAT_R64G64B64_UINT ?
229 PIPE_FORMAT_R32G32_UINT :
230 PIPE_FORMAT_R32G32B32A32_UINT;
231 tmp[new_count + 1].src_offset += 16;
232 new_count += 2;
233 break;
234
235 default:
236 tmp[new_count++] = input[i];
237 break;
238 }
239 }
240
241 *velem_count = new_count;
242 *velems = tmp;
243 }
244
245 /* This is a helper for hardware bring-up. Don't remove. */
246 struct pipe_query *
util_begin_pipestat_query(struct pipe_context * ctx)247 util_begin_pipestat_query(struct pipe_context *ctx)
248 {
249 struct pipe_query *q =
250 ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
251 if (!q)
252 return NULL;
253
254 ctx->begin_query(ctx, q);
255 return q;
256 }
257
258 /* This is a helper for hardware bring-up. Don't remove. */
259 void
util_end_pipestat_query(struct pipe_context * ctx,struct pipe_query * q,FILE * f)260 util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
261 FILE *f)
262 {
263 static unsigned counter;
264 struct pipe_query_data_pipeline_statistics stats;
265
266 ctx->end_query(ctx, q);
267 ctx->get_query_result(ctx, q, true, (void*)&stats);
268 ctx->destroy_query(ctx, q);
269
270 fprintf(f,
271 "Draw call %u:\n"
272 " ia_vertices = %"PRIu64"\n"
273 " ia_primitives = %"PRIu64"\n"
274 " vs_invocations = %"PRIu64"\n"
275 " gs_invocations = %"PRIu64"\n"
276 " gs_primitives = %"PRIu64"\n"
277 " c_invocations = %"PRIu64"\n"
278 " c_primitives = %"PRIu64"\n"
279 " ps_invocations = %"PRIu64"\n"
280 " hs_invocations = %"PRIu64"\n"
281 " ds_invocations = %"PRIu64"\n"
282 " cs_invocations = %"PRIu64"\n",
283 (unsigned)p_atomic_inc_return(&counter),
284 stats.ia_vertices,
285 stats.ia_primitives,
286 stats.vs_invocations,
287 stats.gs_invocations,
288 stats.gs_primitives,
289 stats.c_invocations,
290 stats.c_primitives,
291 stats.ps_invocations,
292 stats.hs_invocations,
293 stats.ds_invocations,
294 stats.cs_invocations);
295 }
296
297 /* This is a helper for profiling. Don't remove. */
298 struct pipe_query *
util_begin_time_query(struct pipe_context * ctx)299 util_begin_time_query(struct pipe_context *ctx)
300 {
301 struct pipe_query *q =
302 ctx->create_query(ctx, PIPE_QUERY_TIME_ELAPSED, 0);
303 if (!q)
304 return NULL;
305
306 ctx->begin_query(ctx, q);
307 return q;
308 }
309
310 /* This is a helper for profiling. Don't remove. */
311 void
util_end_time_query(struct pipe_context * ctx,struct pipe_query * q,FILE * f,const char * name)312 util_end_time_query(struct pipe_context *ctx, struct pipe_query *q, FILE *f,
313 const char *name)
314 {
315 union pipe_query_result result;
316
317 ctx->end_query(ctx, q);
318 ctx->get_query_result(ctx, q, true, &result);
319 ctx->destroy_query(ctx, q);
320
321 fprintf(f, "Time elapsed: %s - %"PRIu64".%u us\n", name, result.u64 / 1000, (unsigned)(result.u64 % 1000) / 100);
322 }
323
324 /* This is a helper for hardware bring-up. Don't remove. */
325 void
util_wait_for_idle(struct pipe_context * ctx)326 util_wait_for_idle(struct pipe_context *ctx)
327 {
328 struct pipe_fence_handle *fence = NULL;
329
330 ctx->flush(ctx, &fence, 0);
331 ctx->screen->fence_finish(ctx->screen, NULL, fence, OS_TIMEOUT_INFINITE);
332 }
333
334 void
util_throttle_init(struct util_throttle * t,uint64_t max_mem_usage)335 util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
336 {
337 t->max_mem_usage = max_mem_usage;
338 }
339
340 void
util_throttle_deinit(struct pipe_screen * screen,struct util_throttle * t)341 util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
342 {
343 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
344 screen->fence_reference(screen, &t->ring[i].fence, NULL);
345 }
346
347 static uint64_t
util_get_throttle_total_memory_usage(struct util_throttle * t)348 util_get_throttle_total_memory_usage(struct util_throttle *t)
349 {
350 uint64_t total_usage = 0;
351
352 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
353 total_usage += t->ring[i].mem_usage;
354 return total_usage;
355 }
356
util_dump_throttle_ring(struct util_throttle * t)357 static void util_dump_throttle_ring(struct util_throttle *t)
358 {
359 printf("Throttle:\n");
360 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
361 printf(" ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
362 i, t->ring[i].fence ? "yes" : " no",
363 t->ring[i].mem_usage,
364 t->flush_index == i ? " [flush]" : "",
365 t->wait_index == i ? " [wait]" : "");
366 }
367 }
368
369 /**
370 * Notify util_throttle that the next operation allocates memory.
371 * util_throttle tracks memory usage and waits for fences until its tracked
372 * memory usage decreases.
373 *
374 * Example:
375 * util_throttle_memory_usage(..., w*h*d*Bpp);
376 * TexSubImage(..., w, h, d, ...);
377 *
378 * This means that TexSubImage can't allocate more memory its maximum limit
379 * set during initialization.
380 */
381 void
util_throttle_memory_usage(struct pipe_context * pipe,struct util_throttle * t,uint64_t memory_size)382 util_throttle_memory_usage(struct pipe_context *pipe,
383 struct util_throttle *t, uint64_t memory_size)
384 {
385 (void)util_dump_throttle_ring; /* silence warning */
386
387 if (!t->max_mem_usage)
388 return;
389
390 MESA_TRACE_FUNC();
391
392 struct pipe_screen *screen = pipe->screen;
393 struct pipe_fence_handle **fence = NULL;
394 unsigned ring_size = ARRAY_SIZE(t->ring);
395 uint64_t total = util_get_throttle_total_memory_usage(t);
396
397 /* If there is not enough memory, walk the list of fences and find
398 * the latest one that we need to wait for.
399 */
400 while (t->wait_index != t->flush_index &&
401 total && total + memory_size > t->max_mem_usage) {
402 assert(t->ring[t->wait_index].fence);
403
404 /* Release an older fence if we need to wait for a newer one. */
405 if (fence)
406 screen->fence_reference(screen, fence, NULL);
407
408 fence = &t->ring[t->wait_index].fence;
409 t->ring[t->wait_index].mem_usage = 0;
410 t->wait_index = (t->wait_index + 1) % ring_size;
411
412 total = util_get_throttle_total_memory_usage(t);
413 }
414
415 /* Wait for the fence to decrease memory usage. */
416 if (fence) {
417 screen->fence_finish(screen, pipe, *fence, OS_TIMEOUT_INFINITE);
418 screen->fence_reference(screen, fence, NULL);
419 }
420
421 /* Flush and get a fence if we've exhausted memory usage for the current
422 * slot.
423 */
424 if (t->ring[t->flush_index].mem_usage &&
425 t->ring[t->flush_index].mem_usage + memory_size >
426 t->max_mem_usage / (ring_size / 2)) {
427 struct pipe_fence_handle **fence =
428 &t->ring[t->flush_index].fence;
429
430 /* Expect that the current flush slot doesn't have a fence yet. */
431 assert(!*fence);
432
433 pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
434 t->flush_index = (t->flush_index + 1) % ring_size;
435
436 /* Vacate the next slot if it's occupied. This should be rare. */
437 if (t->flush_index == t->wait_index) {
438 struct pipe_fence_handle **fence =
439 &t->ring[t->wait_index].fence;
440
441 t->ring[t->wait_index].mem_usage = 0;
442 t->wait_index = (t->wait_index + 1) % ring_size;
443
444 assert(*fence);
445 screen->fence_finish(screen, pipe, *fence, OS_TIMEOUT_INFINITE);
446 screen->fence_reference(screen, fence, NULL);
447 }
448
449 assert(!t->ring[t->flush_index].mem_usage);
450 assert(!t->ring[t->flush_index].fence);
451 }
452
453 t->ring[t->flush_index].mem_usage += memory_size;
454 }
455
456 void
util_sw_query_memory_info(struct pipe_screen * pscreen,struct pipe_memory_info * info)457 util_sw_query_memory_info(struct pipe_screen *pscreen,
458 struct pipe_memory_info *info)
459 {
460 /* Provide query_memory_info from CPU reported memory */
461 uint64_t size;
462
463 if (!os_get_available_system_memory(&size))
464 return;
465 info->avail_staging_memory = size / 1024;
466 if (!os_get_total_physical_memory(&size))
467 return;
468 info->total_staging_memory = size / 1024;
469 }
470
471 void
util_init_pipe_vertex_state(struct pipe_screen * screen,struct pipe_vertex_buffer * buffer,const struct pipe_vertex_element * elements,unsigned num_elements,struct pipe_resource * indexbuf,uint32_t full_velem_mask,struct pipe_vertex_state * state)472 util_init_pipe_vertex_state(struct pipe_screen *screen,
473 struct pipe_vertex_buffer *buffer,
474 const struct pipe_vertex_element *elements,
475 unsigned num_elements,
476 struct pipe_resource *indexbuf,
477 uint32_t full_velem_mask,
478 struct pipe_vertex_state *state)
479 {
480 assert(num_elements == util_bitcount(full_velem_mask));
481
482 pipe_reference_init(&state->reference, 1);
483 state->screen = screen;
484
485 pipe_vertex_buffer_reference(&state->input.vbuffer, buffer);
486 pipe_resource_reference(&state->input.indexbuf, indexbuf);
487 state->input.num_elements = num_elements;
488 for (unsigned i = 0; i < num_elements; i++)
489 state->input.elements[i] = elements[i];
490 state->input.full_velem_mask = full_velem_mask;
491 }
492
493 /**
494 * Clamp color value to format range.
495 */
496 union pipe_color_union
util_clamp_color(enum pipe_format format,const union pipe_color_union * color)497 util_clamp_color(enum pipe_format format,
498 const union pipe_color_union *color)
499 {
500 union pipe_color_union clamp_color = *color;
501 int i;
502
503 for (i = 0; i < 4; i++) {
504 uint8_t bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, i);
505
506 if (!bits)
507 continue;
508
509 if (util_format_is_unorm(format))
510 clamp_color.f[i] = SATURATE(clamp_color.f[i]);
511 else if (util_format_is_snorm(format))
512 clamp_color.f[i] = CLAMP(clamp_color.f[i], -1.0, 1.0);
513 else if (util_format_is_pure_uint(format))
514 clamp_color.ui[i] = _mesa_unsigned_to_unsigned(clamp_color.ui[i], bits);
515 else if (util_format_is_pure_sint(format))
516 clamp_color.i[i] = _mesa_signed_to_signed(clamp_color.i[i], bits);
517 }
518
519 return clamp_color;
520 }
521
522 /*
523 * Some hardware does not use a distinct descriptor for images, so it is
524 * convenient for drivers to reuse their texture descriptor packing for shader
525 * images. This helper constructs a synthetic, non-reference counted
526 * pipe_sampler_view corresponding to a given pipe_image_view for drivers'
527 * internal convenience.
528 *
529 * The returned descriptor is "synthetic" in the sense that it is not reference
530 * counted and the context field is ignored. Otherwise it's complete.
531 */
532 struct pipe_sampler_view
util_image_to_sampler_view(struct pipe_image_view * v)533 util_image_to_sampler_view(struct pipe_image_view *v)
534 {
535 struct pipe_sampler_view out = {
536 .format = v->format,
537 .is_tex2d_from_buf = v->access & PIPE_IMAGE_ACCESS_TEX2D_FROM_BUFFER,
538 .target = v->resource->target,
539 .swizzle_r = PIPE_SWIZZLE_X,
540 .swizzle_g = PIPE_SWIZZLE_Y,
541 .swizzle_b = PIPE_SWIZZLE_Z,
542 .swizzle_a = PIPE_SWIZZLE_W,
543 .texture = v->resource,
544 };
545
546 if (out.target == PIPE_BUFFER) {
547 out.u.buf.offset = v->u.buf.offset;
548 out.u.buf.size = v->u.buf.size;
549 } else if (out.is_tex2d_from_buf) {
550 out.u.tex2d_from_buf.offset = v->u.tex2d_from_buf.offset;
551 out.u.tex2d_from_buf.row_stride = v->u.tex2d_from_buf.row_stride;
552 out.u.tex2d_from_buf.width = v->u.tex2d_from_buf.width;
553 out.u.tex2d_from_buf.height = v->u.tex2d_from_buf.height;
554 } else {
555 /* For a single layer view of a multilayer image, we need to swap in the
556 * non-layered texture target to match the texture instruction.
557 */
558 if (v->u.tex.single_layer_view) {
559 switch (out.target) {
560 case PIPE_TEXTURE_1D_ARRAY:
561 /* A single layer is a 1D image */
562 out.target = PIPE_TEXTURE_1D;
563 break;
564
565 case PIPE_TEXTURE_3D:
566 case PIPE_TEXTURE_CUBE:
567 case PIPE_TEXTURE_2D_ARRAY:
568 case PIPE_TEXTURE_CUBE_ARRAY:
569 /* A single layer/face is a 2D image.
570 *
571 * Note that OpenGL does not otherwise support 2D views of 3D.
572 * Drivers that use this helper must support that anyway.
573 */
574 out.target = PIPE_TEXTURE_2D;
575 break;
576
577 default:
578 /* Other texture targets already only have 1 layer, nothing to do */
579 break;
580 }
581 }
582
583 out.u.tex.first_layer = v->u.tex.first_layer;
584 out.u.tex.last_layer = v->u.tex.last_layer;
585
586 /* Single level only */
587 out.u.tex.first_level = v->u.tex.level;
588 out.u.tex.last_level = v->u.tex.level;
589 }
590
591 return out;
592 }
593