1 /*
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 * Authors: Marek Olšák <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 /**
8 * This file contains common screen and context structures and functions
9 * for r600g and radeonsi.
10 */
11
12 #ifndef R600_PIPE_COMMON_H
13 #define R600_PIPE_COMMON_H
14
15 #include <stdio.h>
16
17 #include "winsys/radeon_winsys.h"
18
19 #include "util/disk_cache.h"
20 #include "util/u_blitter.h"
21 #include "util/list.h"
22 #include "util/u_range.h"
23 #include "util/slab.h"
24 #include "util/u_suballoc.h"
25 #include "util/u_transfer.h"
26 #include "util/u_threaded_context.h"
27
28 #include "compiler/nir/nir.h"
29
30 struct u_log_context;
31 #define ATI_VENDOR_ID 0x1002
32
33 #define R600_RESOURCE_FLAG_TRANSFER (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
34 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
35 #define R600_RESOURCE_FLAG_FORCE_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
36 #define R600_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
37
38 #define R600_CONTEXT_STREAMOUT_FLUSH (1u << 0)
39 /* Pipeline & streamout query controls. */
40 #define R600_CONTEXT_START_PIPELINE_STATS (1u << 1)
41 #define R600_CONTEXT_STOP_PIPELINE_STATS (1u << 2)
42 #define R600_CONTEXT_FLUSH_FOR_RENDER_COND (1u << 3)
43 #define R600_CONTEXT_PRIVATE_FLAG (1u << 4)
44
45 /* special primitive types */
46 #define R600_PRIM_RECTANGLE_LIST MESA_PRIM_COUNT
47
48 #define R600_NOT_QUERY 0xffffffff
49
50 /* Debug flags. */
51 #define DBG_VS (1 << PIPE_SHADER_VERTEX)
52 #define DBG_PS (1 << PIPE_SHADER_FRAGMENT)
53 #define DBG_GS (1 << PIPE_SHADER_GEOMETRY)
54 #define DBG_TCS (1 << PIPE_SHADER_TESS_CTRL)
55 #define DBG_TES (1 << PIPE_SHADER_TESS_EVAL)
56 #define DBG_CS (1 << PIPE_SHADER_COMPUTE)
57 #define DBG_ALL_SHADERS (DBG_FS - 1)
58 #define DBG_FS (1 << 6) /* fetch shader */
59 #define DBG_TEX (1 << 7)
60 #define DBG_NIR (1 << 8)
61 #define DBG_COMPUTE (1 << 9)
62 /* gap */
63 #define DBG_VM (1 << 11)
64 #define DBG_PREOPT_IR (1 << 15)
65 #define DBG_CHECK_IR (1 << 16)
66 /* gaps */
67 #define DBG_TEST_DMA (1 << 20)
68 /* Bits 21-31 are reserved for the r600g driver. */
69 /* features */
70 #define DBG_NO_ASYNC_DMA (1ull << 32)
71 #define DBG_NO_HYPERZ (1ull << 33)
72 #define DBG_NO_DISCARD_RANGE (1ull << 34)
73 #define DBG_NO_2D_TILING (1ull << 35)
74 #define DBG_NO_TILING (1ull << 36)
75 #define DBG_SWITCH_ON_EOP (1ull << 37)
76 #define DBG_FORCE_DMA (1ull << 38)
77 #define DBG_INFO (1ull << 40)
78 #define DBG_NO_WC (1ull << 41)
79 #define DBG_CHECK_VM (1ull << 42)
80 /* gap */
81 #define DBG_TEST_VMFAULT_CP (1ull << 51)
82 #define DBG_TEST_VMFAULT_SDMA (1ull << 52)
83 #define DBG_TEST_VMFAULT_SHADER (1ull << 53)
84
85 #define R600_MAP_BUFFER_ALIGNMENT 64
86 #define R600_MAX_VIEWPORTS 16
87
88 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
89
90 enum r600_coherency {
91 R600_COHERENCY_NONE, /* no cache flushes needed */
92 R600_COHERENCY_SHADER,
93 R600_COHERENCY_CB_META,
94 };
95
96 struct r600_common_context;
97 struct r600_perfcounters;
98 struct tgsi_shader_info;
99 struct r600_qbo_state;
100
101 /* Only 32-bit buffer allocations are supported, gallium doesn't support more
102 * at the moment.
103 */
104 struct r600_resource {
105 struct threaded_resource b;
106
107 /* Winsys objects. */
108 struct pb_buffer_lean *buf;
109 uint64_t gpu_address;
110 /* Memory usage if the buffer placement is optimal. */
111 uint64_t vram_usage;
112 uint64_t gart_usage;
113
114 /* Resource properties. */
115 uint64_t bo_size;
116 unsigned bo_alignment;
117 enum radeon_bo_domain domains;
118 enum radeon_bo_flag flags;
119 unsigned bind_history;
120
121 /* The buffer range which is initialized (with a write transfer,
122 * streamout, DMA, or as a random access target). The rest of
123 * the buffer is considered invalid and can be mapped unsynchronized.
124 *
125 * This allows unsynchronized mapping of a buffer range which hasn't
126 * been used yet. It's for applications which forget to use
127 * the unsynchronized map flag and expect the driver to figure it out.
128 */
129 struct util_range valid_buffer_range;
130
131 /* Whether the resource has been exported via resource_get_handle. */
132 unsigned external_usage; /* PIPE_HANDLE_USAGE_* */
133
134 /* Whether this resource is referenced by bindless handles. */
135 bool texture_handle_allocated;
136 bool image_handle_allocated;
137 bool compute_global_bo;
138
139 /*
140 * EG/Cayman only - for RAT operations hw need an immediate buffer
141 * to store results in.
142 */
143 struct r600_resource *immed_buffer;
144 };
145
146 struct r600_transfer {
147 struct threaded_transfer b;
148 struct r600_resource *staging;
149 };
150
151 struct r600_fmask_info {
152 uint64_t offset;
153 uint64_t size;
154 unsigned alignment;
155 unsigned pitch_in_pixels;
156 unsigned bank_height;
157 unsigned slice_tile_max;
158 unsigned tile_mode_index;
159 unsigned tile_swizzle;
160 };
161
162 struct r600_cmask_info {
163 uint64_t offset;
164 uint64_t size;
165 unsigned alignment;
166 unsigned slice_tile_max;
167 uint64_t base_address_reg;
168 };
169
170 struct r600_texture {
171 struct r600_resource resource;
172
173 uint64_t size;
174 unsigned num_level0_transfers;
175 enum pipe_format db_render_format;
176 bool is_depth;
177 bool db_compatible;
178 bool can_sample_z;
179 bool can_sample_s;
180 unsigned dirty_level_mask; /* each bit says if that mipmap is compressed */
181 unsigned stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
182 struct r600_texture *flushed_depth_texture;
183 struct radeon_surf surface;
184
185 /* Colorbuffer compression and fast clear. */
186 struct r600_fmask_info fmask;
187 struct r600_cmask_info cmask;
188 struct r600_resource *cmask_buffer;
189 unsigned cb_color_info; /* fast clear enable bit */
190 unsigned color_clear_value[2];
191 unsigned last_msaa_resolve_target_micro_mode;
192
193 /* Depth buffer compression and fast clear. */
194 uint64_t htile_offset;
195 bool depth_cleared; /* if it was cleared at least once */
196 float depth_clear_value;
197 bool stencil_cleared; /* if it was cleared at least once */
198 uint8_t stencil_clear_value;
199
200 bool non_disp_tiling; /* R600-Cayman only */
201
202 /* Counter that should be non-zero if the texture is bound to a
203 * framebuffer. Implemented in radeonsi only.
204 */
205 uint32_t framebuffers_bound;
206 };
207
208 struct r600_surface {
209 struct pipe_surface base;
210
211 /* These can vary with block-compressed textures. */
212 unsigned width0;
213 unsigned height0;
214
215 bool color_initialized;
216 bool depth_initialized;
217
218 /* Misc. color flags. */
219 bool alphatest_bypass;
220 bool export_16bpc;
221 bool color_is_int8;
222 bool color_is_int10;
223
224 /* Color registers. */
225 unsigned cb_color_info;
226 unsigned cb_color_base;
227 unsigned cb_color_view;
228 unsigned cb_color_size; /* R600 only */
229 unsigned cb_color_dim; /* EG only */
230 unsigned cb_color_pitch; /* EG and later */
231 unsigned cb_color_slice; /* EG and later */
232 unsigned cb_color_attrib; /* EG and later */
233 unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
234 unsigned cb_color_fmask_slice; /* EG and later */
235 unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
236 unsigned cb_color_mask; /* R600 only */
237 struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
238 struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
239
240 /* DB registers. */
241 uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
242 uint64_t db_stencil_base; /* EG and later */
243 uint64_t db_htile_data_base;
244 unsigned db_depth_info; /* R600 only, then SI and later */
245 unsigned db_z_info; /* EG and later */
246 unsigned db_depth_view;
247 unsigned db_depth_size;
248 unsigned db_depth_slice; /* EG and later */
249 unsigned db_stencil_info; /* EG and later */
250 unsigned db_prefetch_limit; /* R600 only */
251 unsigned db_htile_surface;
252 unsigned db_preload_control; /* EG and later */
253 };
254
255 struct r600_mmio_counter {
256 unsigned busy;
257 unsigned idle;
258 };
259
260 union r600_mmio_counters {
261 struct r600_mmio_counters_named {
262 /* For global GPU load including SDMA. */
263 struct r600_mmio_counter gpu;
264
265 /* GRBM_STATUS */
266 struct r600_mmio_counter spi;
267 struct r600_mmio_counter gui;
268 struct r600_mmio_counter ta;
269 struct r600_mmio_counter gds;
270 struct r600_mmio_counter vgt;
271 struct r600_mmio_counter ia;
272 struct r600_mmio_counter sx;
273 struct r600_mmio_counter wd;
274 struct r600_mmio_counter bci;
275 struct r600_mmio_counter sc;
276 struct r600_mmio_counter pa;
277 struct r600_mmio_counter db;
278 struct r600_mmio_counter cp;
279 struct r600_mmio_counter cb;
280
281 /* SRBM_STATUS2 */
282 struct r600_mmio_counter sdma;
283
284 /* CP_STAT */
285 struct r600_mmio_counter pfp;
286 struct r600_mmio_counter meq;
287 struct r600_mmio_counter me;
288 struct r600_mmio_counter surf_sync;
289 struct r600_mmio_counter cp_dma;
290 struct r600_mmio_counter scratch_ram;
291 } named;
292 unsigned array[sizeof(struct r600_mmio_counters_named) / sizeof(unsigned)];
293 };
294
295 struct r600_memory_object {
296 struct pipe_memory_object b;
297 struct pb_buffer_lean *buf;
298 uint32_t stride;
299 uint32_t offset;
300 };
301
302 struct r600_common_screen {
303 struct pipe_screen b;
304 struct radeon_winsys *ws;
305 enum radeon_family family;
306 enum amd_gfx_level gfx_level;
307 struct radeon_info info;
308 uint64_t debug_flags;
309 bool has_cp_dma;
310 bool has_streamout;
311
312 struct disk_cache *disk_shader_cache;
313
314 struct slab_parent_pool pool_transfers;
315
316 /* Texture filter settings. */
317 int force_aniso; /* -1 = disabled */
318
319 /* Auxiliary context. Mainly used to initialize resources.
320 * It must be locked prior to using and flushed before unlocking. */
321 struct pipe_context *aux_context;
322 mtx_t aux_context_lock;
323
324 /* This must be in the screen, because UE4 uses one context for
325 * compilation and another one for rendering.
326 */
327 unsigned num_compilations;
328 /* Along with ST_DEBUG=precompile, this should show if applications
329 * are loading shaders on demand. This is a monotonic counter.
330 */
331 unsigned num_shaders_created;
332 unsigned num_shader_cache_hits;
333
334 /* GPU load thread. */
335 mtx_t gpu_load_mutex;
336 thrd_t gpu_load_thread;
337 bool gpu_load_thread_created;
338 union r600_mmio_counters mmio_counters;
339 volatile unsigned gpu_load_stop_thread; /* bool */
340
341 char renderer_string[100];
342
343 /* Performance counters. */
344 struct r600_perfcounters *perfcounters;
345
346 /* If pipe_screen wants to recompute and re-emit the framebuffer,
347 * sampler, and image states of all contexts, it should atomically
348 * increment this.
349 *
350 * Each context will compare this with its own last known value of
351 * the counter before drawing and re-emit the states accordingly.
352 */
353 unsigned dirty_tex_counter;
354
355 /* Atomically increment this counter when an existing texture's
356 * metadata is enabled or disabled in a way that requires changing
357 * contexts' compressed texture binding masks.
358 */
359 unsigned compressed_colortex_counter;
360
361 struct {
362 /* Context flags to set so that all writes from earlier jobs
363 * in the CP are seen by L2 clients.
364 */
365 unsigned cp_to_L2;
366
367 /* Context flags to set so that all writes from earlier jobs
368 * that end in L2 are seen by CP.
369 */
370 unsigned L2_to_cp;
371
372 /* Context flags to set so that all writes from earlier
373 * compute jobs are seen by L2 clients.
374 */
375 unsigned compute_to_L2;
376 } barrier_flags;
377
378 struct nir_shader_compiler_options nir_options;
379 struct nir_shader_compiler_options nir_options_fs;
380 };
381
382 /* This encapsulates a state or an operation which can emitted into the GPU
383 * command stream. */
384 struct r600_atom {
385 void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
386 unsigned num_dw;
387 unsigned short id;
388 };
389
390 struct r600_so_target {
391 struct pipe_stream_output_target b;
392
393 /* The buffer where BUFFER_FILLED_SIZE is stored. */
394 struct r600_resource *buf_filled_size;
395 unsigned buf_filled_size_offset;
396 bool buf_filled_size_valid;
397
398 unsigned stride_in_dw;
399 };
400
401 struct r600_streamout {
402 struct r600_atom begin_atom;
403 bool begin_emitted;
404 unsigned num_dw_for_end;
405
406 unsigned enabled_mask;
407 unsigned num_targets;
408 struct r600_so_target *targets[PIPE_MAX_SO_BUFFERS];
409
410 unsigned append_bitmask;
411 bool suspended;
412
413 /* External state which comes from the vertex shader,
414 * it must be set explicitly when binding a shader. */
415 uint16_t *stride_in_dw;
416 unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
417
418 /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
419 unsigned hw_enabled_mask;
420
421 /* The state of VGT_STRMOUT_(CONFIG|EN). */
422 struct r600_atom enable_atom;
423 bool streamout_enabled;
424 bool prims_gen_query_enabled;
425 int num_prims_gen_queries;
426 };
427
428 struct r600_signed_scissor {
429 int minx;
430 int miny;
431 int maxx;
432 int maxy;
433 };
434
435 struct r600_scissors {
436 struct r600_atom atom;
437 unsigned dirty_mask;
438 struct pipe_scissor_state states[R600_MAX_VIEWPORTS];
439 };
440
441 struct r600_viewports {
442 struct r600_atom atom;
443 unsigned dirty_mask;
444 unsigned depth_range_dirty_mask;
445 struct pipe_viewport_state states[R600_MAX_VIEWPORTS];
446 struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS];
447 };
448
449 struct r600_ring {
450 struct radeon_cmdbuf cs;
451 void (*flush)(void *ctx, unsigned flags,
452 struct pipe_fence_handle **fence);
453 };
454
455 /* Saved CS data for debugging features. */
456 struct radeon_saved_cs {
457 uint32_t *ib;
458 unsigned num_dw;
459
460 struct radeon_bo_list_item *bo_list;
461 unsigned bo_count;
462 };
463
464 struct r600_common_context {
465 struct pipe_context b; /* base class */
466
467 struct r600_common_screen *screen;
468 struct radeon_winsys *ws;
469 struct radeon_winsys_ctx *ctx;
470 enum radeon_family family;
471 enum amd_gfx_level gfx_level;
472 struct r600_ring gfx;
473 struct r600_ring dma;
474 struct pipe_fence_handle *last_gfx_fence;
475 struct pipe_fence_handle *last_sdma_fence;
476 struct r600_resource *eop_bug_scratch;
477 unsigned num_gfx_cs_flushes;
478 unsigned initial_gfx_cs_size;
479 unsigned last_dirty_tex_counter;
480 unsigned last_compressed_colortex_counter;
481 unsigned last_num_draw_calls;
482
483 struct threaded_context *tc;
484 struct u_suballocator allocator_zeroed_memory;
485 struct slab_child_pool pool_transfers;
486 struct slab_child_pool pool_transfers_unsync; /* for threaded_context */
487
488 /* Current unaccounted memory usage. */
489 uint64_t vram;
490 uint64_t gtt;
491
492 /* States. */
493 struct r600_streamout streamout;
494 struct r600_scissors scissors;
495 struct r600_viewports viewports;
496 bool scissor_enabled;
497 bool clip_halfz;
498 bool vs_writes_viewport_index;
499 bool vs_disables_clipping_viewport;
500
501 /* Additional context states. */
502 unsigned flags; /* flush flags */
503
504 /* Queries. */
505 /* Maintain the list of active queries for pausing between IBs. */
506 int num_occlusion_queries;
507 int num_perfect_occlusion_queries;
508 struct list_head active_queries;
509 unsigned num_cs_dw_queries_suspend;
510 /* Misc stats. */
511 unsigned num_draw_calls;
512 unsigned num_decompress_calls;
513 unsigned num_mrt_draw_calls;
514 unsigned num_prim_restart_calls;
515 unsigned num_spill_draw_calls;
516 unsigned num_compute_calls;
517 unsigned num_spill_compute_calls;
518 unsigned num_dma_calls;
519 unsigned num_cp_dma_calls;
520 unsigned num_vs_flushes;
521 unsigned num_ps_flushes;
522 unsigned num_cs_flushes;
523 unsigned num_cb_cache_flushes;
524 unsigned num_db_cache_flushes;
525 unsigned num_resident_handles;
526 uint64_t num_alloc_tex_transfer_bytes;
527
528 /* Render condition. */
529 struct r600_atom render_cond_atom;
530 struct pipe_query *render_cond;
531 unsigned render_cond_mode;
532 bool render_cond_invert;
533 bool render_cond_force_off; /* for u_blitter */
534
535 /* MSAA sample locations.
536 * The first index is the sample index.
537 * The second index is the coordinate: X, Y. */
538 float sample_locations_1x[1][2];
539 float sample_locations_2x[2][2];
540 float sample_locations_4x[4][2];
541 float sample_locations_8x[8][2];
542 float sample_locations_16x[16][2];
543
544 struct util_debug_callback debug;
545 struct pipe_device_reset_callback device_reset_callback;
546 struct u_log_context *log;
547
548 void *query_result_shader;
549
550 /* Copy one resource to another using async DMA. */
551 void (*dma_copy)(struct pipe_context *ctx,
552 struct pipe_resource *dst,
553 unsigned dst_level,
554 unsigned dst_x, unsigned dst_y, unsigned dst_z,
555 struct pipe_resource *src,
556 unsigned src_level,
557 const struct pipe_box *src_box);
558
559 void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
560 uint64_t offset, uint64_t size, unsigned value);
561
562 void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
563 uint64_t offset, uint64_t size, unsigned value,
564 enum r600_coherency coher);
565
566 void (*blit_decompress_depth)(struct pipe_context *ctx,
567 struct r600_texture *texture,
568 struct r600_texture *staging,
569 unsigned first_level, unsigned last_level,
570 unsigned first_layer, unsigned last_layer,
571 unsigned first_sample, unsigned last_sample);
572
573 /* Reallocate the buffer and update all resource bindings where
574 * the buffer is bound, including all resource descriptors. */
575 void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
576
577 /* Update all resource bindings where the buffer is bound, including
578 * all resource descriptors. This is invalidate_buffer without
579 * the invalidation. */
580 void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf,
581 uint64_t old_gpu_address);
582
583 void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
584
585 /* This ensures there is enough space in the command stream. */
586 void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
587 bool include_draw_vbo);
588
589 void (*set_atom_dirty)(struct r600_common_context *ctx,
590 struct r600_atom *atom, bool dirty);
591
592 void (*check_vm_faults)(struct r600_common_context *ctx,
593 struct radeon_saved_cs *saved,
594 enum amd_ip_type ring);
595 };
596
597 /* r600_buffer_common.c */
598 bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
599 struct pb_buffer_lean *buf,
600 unsigned usage);
601 void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
602 struct r600_resource *resource,
603 unsigned usage);
604 void r600_buffer_subdata(struct pipe_context *ctx,
605 struct pipe_resource *buffer,
606 unsigned usage, unsigned offset,
607 unsigned size, const void *data);
608 void r600_init_resource_fields(struct r600_common_screen *rscreen,
609 struct r600_resource *res,
610 uint64_t size, unsigned alignment);
611 bool r600_alloc_resource(struct r600_common_screen *rscreen,
612 struct r600_resource *res);
613 void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf);
614 void r600_buffer_flush_region(struct pipe_context *ctx,
615 struct pipe_transfer *transfer,
616 const struct pipe_box *rel_box);
617 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
618 const struct pipe_resource *templ,
619 unsigned alignment);
620 struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
621 unsigned flags,
622 unsigned usage,
623 unsigned size,
624 unsigned alignment);
625 struct pipe_resource *
626 r600_buffer_from_user_memory(struct pipe_screen *screen,
627 const struct pipe_resource *templ,
628 void *user_memory);
629 void
630 r600_invalidate_resource(struct pipe_context *ctx,
631 struct pipe_resource *resource);
632 void r600_replace_buffer_storage(struct pipe_context *ctx,
633 struct pipe_resource *dst,
634 struct pipe_resource *src);
635 void *r600_buffer_transfer_map(struct pipe_context *ctx,
636 struct pipe_resource *resource,
637 unsigned level,
638 unsigned usage,
639 const struct pipe_box *box,
640 struct pipe_transfer **ptransfer);
641 void r600_buffer_transfer_unmap(struct pipe_context *ctx,
642 struct pipe_transfer *transfer);
643
644 /* r600_common_pipe.c */
645 void r600_gfx_write_event_eop(struct r600_common_context *ctx,
646 unsigned event, unsigned event_flags,
647 unsigned data_sel,
648 struct r600_resource *buf, uint64_t va,
649 uint32_t new_fence, unsigned query_type);
650 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
651 void r600_gfx_wait_fence(struct r600_common_context *ctx,
652 struct r600_resource *buf,
653 uint64_t va, uint32_t ref, uint32_t mask);
654 void r600_draw_rectangle(struct blitter_context *blitter,
655 void *vertex_elements_cso,
656 blitter_get_vs_func get_vs,
657 int x1, int y1, int x2, int y2,
658 float depth, unsigned num_instances,
659 enum blitter_attrib_type type,
660 const union blitter_attrib *attrib);
661 bool r600_common_screen_init(struct r600_common_screen *rscreen,
662 struct radeon_winsys *ws);
663 void r600_destroy_common_screen(struct r600_common_screen *rscreen);
664 void r600_preflush_suspend_features(struct r600_common_context *ctx);
665 void r600_postflush_resume_features(struct r600_common_context *ctx);
666 bool r600_common_context_init(struct r600_common_context *rctx,
667 struct r600_common_screen *rscreen,
668 unsigned context_flags);
669 void r600_common_context_cleanup(struct r600_common_context *rctx);
670 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
671 unsigned processor);
672 bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
673 unsigned processor);
674 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
675 uint64_t offset, uint64_t size, unsigned value);
676 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
677 const struct pipe_resource *templ);
678 const char *r600_get_llvm_processor_name(enum radeon_family family);
679 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
680 struct r600_resource *dst, struct r600_resource *src);
681 void radeon_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
682 struct radeon_saved_cs *saved, bool get_buffer_list);
683 void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
684 bool r600_check_device_reset(struct r600_common_context *rctx);
685
686 /* r600_gpu_load.c */
687 void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
688 uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
689 unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
690 uint64_t begin);
691
692 /* r600_perfcounters.c */
693 void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
694
695 /* r600_query.c */
696 void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
697 void r600_query_init(struct r600_common_context *rctx);
698 void r600_suspend_queries(struct r600_common_context *ctx);
699 void r600_resume_queries(struct r600_common_context *ctx);
700 void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
701
702 /* r600_streamout.c */
703 void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
704 void r600_set_streamout_targets(struct pipe_context *ctx,
705 unsigned num_targets,
706 struct pipe_stream_output_target **targets,
707 const unsigned *offset);
708 void r600_emit_streamout_end(struct r600_common_context *rctx);
709 void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
710 unsigned type, int diff);
711 void r600_streamout_init(struct r600_common_context *rctx);
712
713 /* r600_test_dma.c */
714 void r600_test_dma(struct r600_common_screen *rscreen);
715
716 /* r600_texture.c */
717 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
718 struct r600_texture *rdst,
719 unsigned dst_level, unsigned dstx,
720 unsigned dsty, unsigned dstz,
721 struct r600_texture *rsrc,
722 unsigned src_level,
723 const struct pipe_box *src_box);
724 void r600_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex);
725 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
726 struct r600_texture *rtex,
727 unsigned nr_samples,
728 struct r600_fmask_info *out);
729 void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
730 struct r600_texture *rtex,
731 struct r600_cmask_info *out);
732 bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
733 struct pipe_resource *texture,
734 struct r600_texture **staging);
735 void r600_print_texture_info(struct r600_common_screen *rscreen,
736 struct r600_texture *rtex, struct u_log_context *log);
737 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
738 const struct pipe_resource *templ);
739 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
740 struct pipe_resource *texture,
741 const struct pipe_surface *templ,
742 unsigned width0, unsigned height0,
743 unsigned width, unsigned height);
744 unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
745 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
746 struct pipe_framebuffer_state *fb,
747 struct r600_atom *fb_state,
748 unsigned *buffers, uint8_t *dirty_cbufs,
749 const union pipe_color_union *color);
750 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
751 void r600_init_context_texture_functions(struct r600_common_context *rctx);
752 void eg_resource_alloc_immed(struct r600_common_screen *rscreen,
753 struct r600_resource *res,
754 unsigned immed_size);
755 void *r600_texture_transfer_map(struct pipe_context *ctx,
756 struct pipe_resource *texture,
757 unsigned level,
758 unsigned usage,
759 const struct pipe_box *box,
760 struct pipe_transfer **ptransfer);
761 void r600_texture_transfer_unmap(struct pipe_context *ctx,
762 struct pipe_transfer* transfer);
763
764 /* r600_viewport.c */
765 void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
766 struct pipe_scissor_state *scissor);
767 void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
768 bool scissor_enable, bool clip_halfz);
769 void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
770 struct tgsi_shader_info *info);
771 void r600_init_viewport_functions(struct r600_common_context *rctx);
772
773 /* cayman_msaa.c */
774 extern const uint32_t eg_sample_locs_2x[4];
775 extern const unsigned eg_max_dist_2x;
776 extern const uint32_t eg_sample_locs_4x[4];
777 extern const unsigned eg_max_dist_4x;
778 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
779 unsigned sample_index, float *out_value);
780 void cayman_init_msaa(struct pipe_context *ctx);
781 void cayman_emit_msaa_state(struct radeon_cmdbuf *cs, int nr_samples,
782 int ps_iter_samples, int overrast_samples);
783
784
785 /* Inline helpers. */
786
r600_resource(struct pipe_resource * r)787 static inline struct r600_resource *r600_resource(struct pipe_resource *r)
788 {
789 return (struct r600_resource*)r;
790 }
791
792 static inline void
r600_resource_reference(struct r600_resource ** ptr,struct r600_resource * res)793 r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
794 {
795 pipe_resource_reference((struct pipe_resource **)ptr,
796 (struct pipe_resource *)res);
797 }
798
799 static inline void
r600_texture_reference(struct r600_texture ** ptr,struct r600_texture * res)800 r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
801 {
802 pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
803 }
804
805 static inline void
r600_context_add_resource_size(struct pipe_context * ctx,struct pipe_resource * r)806 r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
807 {
808 struct r600_common_context *rctx = (struct r600_common_context *)ctx;
809 struct r600_resource *res = (struct r600_resource *)r;
810
811 if (res) {
812 /* Add memory usage for need_gfx_cs_space */
813 rctx->vram += res->vram_usage;
814 rctx->gtt += res->gart_usage;
815 }
816 }
817
r600_get_strmout_en(struct r600_common_context * rctx)818 static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
819 {
820 return rctx->streamout.streamout_enabled ||
821 rctx->streamout.prims_gen_query_enabled;
822 }
823
824 #define SQ_TEX_XY_FILTER_POINT 0x00
825 #define SQ_TEX_XY_FILTER_BILINEAR 0x01
826 #define SQ_TEX_XY_FILTER_ANISO_POINT 0x02
827 #define SQ_TEX_XY_FILTER_ANISO_BILINEAR 0x03
828
eg_tex_filter(unsigned filter,unsigned max_aniso)829 static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
830 {
831 if (filter == PIPE_TEX_FILTER_LINEAR)
832 return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
833 : SQ_TEX_XY_FILTER_BILINEAR;
834 else
835 return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
836 : SQ_TEX_XY_FILTER_POINT;
837 }
838
r600_tex_aniso_filter(unsigned filter)839 static inline unsigned r600_tex_aniso_filter(unsigned filter)
840 {
841 if (filter < 2)
842 return 0;
843 if (filter < 4)
844 return 1;
845 if (filter < 8)
846 return 2;
847 if (filter < 16)
848 return 3;
849 return 4;
850 }
851
r600_wavefront_size(enum radeon_family family)852 static inline unsigned r600_wavefront_size(enum radeon_family family)
853 {
854 switch (family) {
855 case CHIP_RV610:
856 case CHIP_RS780:
857 case CHIP_RV620:
858 case CHIP_RS880:
859 return 16;
860 case CHIP_RV630:
861 case CHIP_RV635:
862 case CHIP_RV730:
863 case CHIP_RV710:
864 case CHIP_PALM:
865 case CHIP_CEDAR:
866 return 32;
867 default:
868 return 64;
869 }
870 }
871
872 static inline unsigned
r600_get_sampler_view_priority(struct r600_resource * res)873 r600_get_sampler_view_priority(struct r600_resource *res)
874 {
875 if (res->b.b.target == PIPE_BUFFER)
876 return RADEON_PRIO_SAMPLER_BUFFER;
877
878 if (res->b.b.nr_samples > 1)
879 return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
880
881 return RADEON_PRIO_SAMPLER_TEXTURE;
882 }
883
884 static inline bool
r600_can_sample_zs(struct r600_texture * tex,bool stencil_sampler)885 r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
886 {
887 return (stencil_sampler && tex->can_sample_s) ||
888 (!stencil_sampler && tex->can_sample_z);
889 }
890
891 static inline bool
r600_htile_enabled(struct r600_texture * tex,unsigned level)892 r600_htile_enabled(struct r600_texture *tex, unsigned level)
893 {
894 return tex->htile_offset && level == 0;
895 }
896
897 #define COMPUTE_DBG(rscreen, fmt, args...) \
898 do { \
899 if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
900 } while (0);
901
902 #define R600_ERR(fmt, args...) \
903 fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
904
905 /* For MSAA sample positions. */
906 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
907 (((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \
908 (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \
909 (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
910 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
911
S_FIXED(float value,unsigned frac_bits)912 static inline int S_FIXED(float value, unsigned frac_bits)
913 {
914 return value * (1 << frac_bits);
915 }
916
917 #endif
918