xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/r600_pipe_common.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  * Authors: Marek Olšák <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 /**
8  * This file contains common screen and context structures and functions
9  * for r600g and radeonsi.
10  */
11 
12 #ifndef R600_PIPE_COMMON_H
13 #define R600_PIPE_COMMON_H
14 
15 #include <stdio.h>
16 
17 #include "winsys/radeon_winsys.h"
18 
19 #include "util/disk_cache.h"
20 #include "util/u_blitter.h"
21 #include "util/list.h"
22 #include "util/u_range.h"
23 #include "util/slab.h"
24 #include "util/u_suballoc.h"
25 #include "util/u_transfer.h"
26 #include "util/u_threaded_context.h"
27 
28 #include "compiler/nir/nir.h"
29 
30 struct u_log_context;
31 #define ATI_VENDOR_ID 0x1002
32 
33 #define R600_RESOURCE_FLAG_TRANSFER		(PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
34 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
35 #define R600_RESOURCE_FLAG_FORCE_TILING		(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
36 #define R600_RESOURCE_FLAG_UNMAPPABLE		(PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
37 
38 #define R600_CONTEXT_STREAMOUT_FLUSH		(1u << 0)
39 /* Pipeline & streamout query controls. */
40 #define R600_CONTEXT_START_PIPELINE_STATS	(1u << 1)
41 #define R600_CONTEXT_STOP_PIPELINE_STATS	(1u << 2)
42 #define R600_CONTEXT_FLUSH_FOR_RENDER_COND	(1u << 3)
43 #define R600_CONTEXT_PRIVATE_FLAG		(1u << 4)
44 
45 /* special primitive types */
46 #define R600_PRIM_RECTANGLE_LIST	MESA_PRIM_COUNT
47 
48 #define R600_NOT_QUERY		0xffffffff
49 
50 /* Debug flags. */
51 #define DBG_VS			(1 << PIPE_SHADER_VERTEX)
52 #define DBG_PS			(1 << PIPE_SHADER_FRAGMENT)
53 #define DBG_GS			(1 << PIPE_SHADER_GEOMETRY)
54 #define DBG_TCS			(1 << PIPE_SHADER_TESS_CTRL)
55 #define DBG_TES			(1 << PIPE_SHADER_TESS_EVAL)
56 #define DBG_CS			(1 << PIPE_SHADER_COMPUTE)
57 #define DBG_ALL_SHADERS		(DBG_FS - 1)
58 #define DBG_FS			(1 << 6) /* fetch shader */
59 #define DBG_TEX			(1 << 7)
60 #define DBG_NIR			(1 << 8)
61 #define DBG_COMPUTE		(1 << 9)
62 /* gap */
63 #define DBG_VM			(1 << 11)
64 #define DBG_PREOPT_IR		(1 << 15)
65 #define DBG_CHECK_IR		(1 << 16)
66 /* gaps */
67 #define DBG_TEST_DMA		(1 << 20)
68 /* Bits 21-31 are reserved for the r600g driver. */
69 /* features */
70 #define DBG_NO_ASYNC_DMA	(1ull << 32)
71 #define DBG_NO_HYPERZ		(1ull << 33)
72 #define DBG_NO_DISCARD_RANGE	(1ull << 34)
73 #define DBG_NO_2D_TILING	(1ull << 35)
74 #define DBG_NO_TILING		(1ull << 36)
75 #define DBG_SWITCH_ON_EOP	(1ull << 37)
76 #define DBG_FORCE_DMA		(1ull << 38)
77 #define DBG_INFO		(1ull << 40)
78 #define DBG_NO_WC		(1ull << 41)
79 #define DBG_CHECK_VM		(1ull << 42)
80 /* gap */
81 #define DBG_TEST_VMFAULT_CP	(1ull << 51)
82 #define DBG_TEST_VMFAULT_SDMA	(1ull << 52)
83 #define DBG_TEST_VMFAULT_SHADER	(1ull << 53)
84 
85 #define R600_MAP_BUFFER_ALIGNMENT 64
86 #define R600_MAX_VIEWPORTS        16
87 
88 #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
89 
90 enum r600_coherency {
91 	R600_COHERENCY_NONE, /* no cache flushes needed */
92 	R600_COHERENCY_SHADER,
93 	R600_COHERENCY_CB_META,
94 };
95 
96 struct r600_common_context;
97 struct r600_perfcounters;
98 struct tgsi_shader_info;
99 struct r600_qbo_state;
100 
101 /* Only 32-bit buffer allocations are supported, gallium doesn't support more
102  * at the moment.
103  */
104 struct r600_resource {
105 	struct threaded_resource	b;
106 
107 	/* Winsys objects. */
108 	struct pb_buffer_lean		*buf;
109 	uint64_t			gpu_address;
110 	/* Memory usage if the buffer placement is optimal. */
111 	uint64_t			vram_usage;
112 	uint64_t			gart_usage;
113 
114 	/* Resource properties. */
115 	uint64_t			bo_size;
116 	unsigned			bo_alignment;
117 	enum radeon_bo_domain		domains;
118 	enum radeon_bo_flag		flags;
119 	unsigned			bind_history;
120 
121 	/* The buffer range which is initialized (with a write transfer,
122 	 * streamout, DMA, or as a random access target). The rest of
123 	 * the buffer is considered invalid and can be mapped unsynchronized.
124 	 *
125 	 * This allows unsynchronized mapping of a buffer range which hasn't
126 	 * been used yet. It's for applications which forget to use
127 	 * the unsynchronized map flag and expect the driver to figure it out.
128          */
129 	struct util_range		valid_buffer_range;
130 
131 	/* Whether the resource has been exported via resource_get_handle. */
132 	unsigned			external_usage; /* PIPE_HANDLE_USAGE_* */
133 
134 	/* Whether this resource is referenced by bindless handles. */
135 	bool				texture_handle_allocated;
136 	bool				image_handle_allocated;
137 	bool                            compute_global_bo;
138 
139 	/*
140 	 * EG/Cayman only - for RAT operations hw need an immediate buffer
141 	 * to store results in.
142 	 */
143 	struct r600_resource            *immed_buffer;
144 };
145 
146 struct r600_transfer {
147 	struct threaded_transfer	b;
148 	struct r600_resource		*staging;
149 };
150 
151 struct r600_fmask_info {
152 	uint64_t offset;
153 	uint64_t size;
154 	unsigned alignment;
155 	unsigned pitch_in_pixels;
156 	unsigned bank_height;
157 	unsigned slice_tile_max;
158 	unsigned tile_mode_index;
159 	unsigned tile_swizzle;
160 };
161 
162 struct r600_cmask_info {
163 	uint64_t offset;
164 	uint64_t size;
165 	unsigned alignment;
166 	unsigned slice_tile_max;
167 	uint64_t base_address_reg;
168 };
169 
170 struct r600_texture {
171 	struct r600_resource		resource;
172 
173 	uint64_t			size;
174 	unsigned			num_level0_transfers;
175 	enum pipe_format		db_render_format;
176 	bool				is_depth;
177 	bool				db_compatible;
178 	bool				can_sample_z;
179 	bool				can_sample_s;
180 	unsigned			dirty_level_mask; /* each bit says if that mipmap is compressed */
181 	unsigned			stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
182 	struct r600_texture		*flushed_depth_texture;
183 	struct radeon_surf		surface;
184 
185 	/* Colorbuffer compression and fast clear. */
186 	struct r600_fmask_info		fmask;
187 	struct r600_cmask_info		cmask;
188 	struct r600_resource		*cmask_buffer;
189 	unsigned			cb_color_info; /* fast clear enable bit */
190 	unsigned			color_clear_value[2];
191 	unsigned			last_msaa_resolve_target_micro_mode;
192 
193 	/* Depth buffer compression and fast clear. */
194 	uint64_t			htile_offset;
195 	bool				depth_cleared; /* if it was cleared at least once */
196 	float				depth_clear_value;
197 	bool				stencil_cleared; /* if it was cleared at least once */
198 	uint8_t				stencil_clear_value;
199 
200 	bool				non_disp_tiling; /* R600-Cayman only */
201 
202 	/* Counter that should be non-zero if the texture is bound to a
203 	 * framebuffer. Implemented in radeonsi only.
204 	 */
205 	uint32_t			framebuffers_bound;
206 };
207 
208 struct r600_surface {
209 	struct pipe_surface		base;
210 
211 	/* These can vary with block-compressed textures. */
212 	unsigned width0;
213 	unsigned height0;
214 
215 	bool color_initialized;
216 	bool depth_initialized;
217 
218 	/* Misc. color flags. */
219 	bool alphatest_bypass;
220 	bool export_16bpc;
221 	bool color_is_int8;
222 	bool color_is_int10;
223 
224 	/* Color registers. */
225 	unsigned cb_color_info;
226 	unsigned cb_color_base;
227 	unsigned cb_color_view;
228 	unsigned cb_color_size;		/* R600 only */
229 	unsigned cb_color_dim;		/* EG only */
230 	unsigned cb_color_pitch;	/* EG and later */
231 	unsigned cb_color_slice;	/* EG and later */
232 	unsigned cb_color_attrib;	/* EG and later */
233 	unsigned cb_color_fmask;	/* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
234 	unsigned cb_color_fmask_slice;	/* EG and later */
235 	unsigned cb_color_cmask;	/* CB_COLORn_TILE (r600 only) */
236 	unsigned cb_color_mask;		/* R600 only */
237 	struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
238 	struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
239 
240 	/* DB registers. */
241 	uint64_t db_depth_base;		/* DB_Z_READ/WRITE_BASE (EG and later) or DB_DEPTH_BASE (r600) */
242 	uint64_t db_stencil_base;	/* EG and later */
243 	uint64_t db_htile_data_base;
244 	unsigned db_depth_info;		/* R600 only, then SI and later */
245 	unsigned db_z_info;		/* EG and later */
246 	unsigned db_depth_view;
247 	unsigned db_depth_size;
248 	unsigned db_depth_slice;	/* EG and later */
249 	unsigned db_stencil_info;	/* EG and later */
250 	unsigned db_prefetch_limit;	/* R600 only */
251 	unsigned db_htile_surface;
252 	unsigned db_preload_control;	/* EG and later */
253 };
254 
255 struct r600_mmio_counter {
256 	unsigned busy;
257 	unsigned idle;
258 };
259 
260 union r600_mmio_counters {
261 	struct r600_mmio_counters_named {
262 		/* For global GPU load including SDMA. */
263 		struct r600_mmio_counter gpu;
264 
265 		/* GRBM_STATUS */
266 		struct r600_mmio_counter spi;
267 		struct r600_mmio_counter gui;
268 		struct r600_mmio_counter ta;
269 		struct r600_mmio_counter gds;
270 		struct r600_mmio_counter vgt;
271 		struct r600_mmio_counter ia;
272 		struct r600_mmio_counter sx;
273 		struct r600_mmio_counter wd;
274 		struct r600_mmio_counter bci;
275 		struct r600_mmio_counter sc;
276 		struct r600_mmio_counter pa;
277 		struct r600_mmio_counter db;
278 		struct r600_mmio_counter cp;
279 		struct r600_mmio_counter cb;
280 
281 		/* SRBM_STATUS2 */
282 		struct r600_mmio_counter sdma;
283 
284 		/* CP_STAT */
285 		struct r600_mmio_counter pfp;
286 		struct r600_mmio_counter meq;
287 		struct r600_mmio_counter me;
288 		struct r600_mmio_counter surf_sync;
289 		struct r600_mmio_counter cp_dma;
290 		struct r600_mmio_counter scratch_ram;
291 	} named;
292 	unsigned array[sizeof(struct r600_mmio_counters_named) / sizeof(unsigned)];
293 };
294 
295 struct r600_memory_object {
296 	struct pipe_memory_object	b;
297 	struct pb_buffer_lean		*buf;
298 	uint32_t			stride;
299 	uint32_t			offset;
300 };
301 
302 struct r600_common_screen {
303 	struct pipe_screen		b;
304 	struct radeon_winsys		*ws;
305 	enum radeon_family		family;
306 	enum amd_gfx_level			gfx_level;
307 	struct radeon_info		info;
308 	uint64_t			debug_flags;
309 	bool				has_cp_dma;
310 	bool				has_streamout;
311 
312 	struct disk_cache		*disk_shader_cache;
313 
314 	struct slab_parent_pool		pool_transfers;
315 
316 	/* Texture filter settings. */
317 	int				force_aniso; /* -1 = disabled */
318 
319 	/* Auxiliary context. Mainly used to initialize resources.
320 	 * It must be locked prior to using and flushed before unlocking. */
321 	struct pipe_context		*aux_context;
322 	mtx_t				aux_context_lock;
323 
324 	/* This must be in the screen, because UE4 uses one context for
325 	 * compilation and another one for rendering.
326 	 */
327 	unsigned			num_compilations;
328 	/* Along with ST_DEBUG=precompile, this should show if applications
329 	 * are loading shaders on demand. This is a monotonic counter.
330 	 */
331 	unsigned			num_shaders_created;
332 	unsigned			num_shader_cache_hits;
333 
334 	/* GPU load thread. */
335 	mtx_t				gpu_load_mutex;
336 	thrd_t				gpu_load_thread;
337 	bool				gpu_load_thread_created;
338 	union r600_mmio_counters	mmio_counters;
339 	volatile unsigned		gpu_load_stop_thread; /* bool */
340 
341 	char				renderer_string[100];
342 
343 	/* Performance counters. */
344 	struct r600_perfcounters	*perfcounters;
345 
346 	/* If pipe_screen wants to recompute and re-emit the framebuffer,
347 	 * sampler, and image states of all contexts, it should atomically
348 	 * increment this.
349 	 *
350 	 * Each context will compare this with its own last known value of
351 	 * the counter before drawing and re-emit the states accordingly.
352 	 */
353 	unsigned			dirty_tex_counter;
354 
355 	/* Atomically increment this counter when an existing texture's
356 	 * metadata is enabled or disabled in a way that requires changing
357 	 * contexts' compressed texture binding masks.
358 	 */
359 	unsigned			compressed_colortex_counter;
360 
361 	struct {
362 		/* Context flags to set so that all writes from earlier jobs
363 		 * in the CP are seen by L2 clients.
364 		 */
365 		unsigned cp_to_L2;
366 
367 		/* Context flags to set so that all writes from earlier jobs
368 		 * that end in L2 are seen by CP.
369 		 */
370 		unsigned L2_to_cp;
371 
372 		/* Context flags to set so that all writes from earlier
373 		 * compute jobs are seen by L2 clients.
374 		 */
375 		unsigned compute_to_L2;
376 	} barrier_flags;
377 
378 	struct nir_shader_compiler_options nir_options;
379 	struct nir_shader_compiler_options nir_options_fs;
380 };
381 
382 /* This encapsulates a state or an operation which can emitted into the GPU
383  * command stream. */
384 struct r600_atom {
385 	void (*emit)(struct r600_common_context *ctx, struct r600_atom *state);
386 	unsigned		num_dw;
387 	unsigned short		id;
388 };
389 
390 struct r600_so_target {
391 	struct pipe_stream_output_target b;
392 
393 	/* The buffer where BUFFER_FILLED_SIZE is stored. */
394 	struct r600_resource	*buf_filled_size;
395 	unsigned		buf_filled_size_offset;
396 	bool			buf_filled_size_valid;
397 
398 	unsigned		stride_in_dw;
399 };
400 
401 struct r600_streamout {
402 	struct r600_atom		begin_atom;
403 	bool				begin_emitted;
404 	unsigned			num_dw_for_end;
405 
406 	unsigned			enabled_mask;
407 	unsigned			num_targets;
408 	struct r600_so_target		*targets[PIPE_MAX_SO_BUFFERS];
409 
410 	unsigned			append_bitmask;
411 	bool				suspended;
412 
413 	/* External state which comes from the vertex shader,
414 	 * it must be set explicitly when binding a shader. */
415 	uint16_t			*stride_in_dw;
416 	unsigned			enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
417 
418 	/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
419 	unsigned			hw_enabled_mask;
420 
421 	/* The state of VGT_STRMOUT_(CONFIG|EN). */
422 	struct r600_atom		enable_atom;
423 	bool				streamout_enabled;
424 	bool				prims_gen_query_enabled;
425 	int				num_prims_gen_queries;
426 };
427 
428 struct r600_signed_scissor {
429 	int minx;
430 	int miny;
431 	int maxx;
432 	int maxy;
433 };
434 
435 struct r600_scissors {
436 	struct r600_atom		atom;
437 	unsigned			dirty_mask;
438 	struct pipe_scissor_state	states[R600_MAX_VIEWPORTS];
439 };
440 
441 struct r600_viewports {
442 	struct r600_atom		atom;
443 	unsigned			dirty_mask;
444 	unsigned			depth_range_dirty_mask;
445 	struct pipe_viewport_state	states[R600_MAX_VIEWPORTS];
446 	struct r600_signed_scissor	as_scissor[R600_MAX_VIEWPORTS];
447 };
448 
449 struct r600_ring {
450 	struct radeon_cmdbuf		cs;
451 	void (*flush)(void *ctx, unsigned flags,
452 		      struct pipe_fence_handle **fence);
453 };
454 
455 /* Saved CS data for debugging features. */
456 struct radeon_saved_cs {
457 	uint32_t			*ib;
458 	unsigned			num_dw;
459 
460 	struct radeon_bo_list_item	*bo_list;
461 	unsigned			bo_count;
462 };
463 
464 struct r600_common_context {
465 	struct pipe_context b; /* base class */
466 
467 	struct r600_common_screen	*screen;
468 	struct radeon_winsys		*ws;
469 	struct radeon_winsys_ctx	*ctx;
470 	enum radeon_family		family;
471 	enum amd_gfx_level			gfx_level;
472 	struct r600_ring		gfx;
473 	struct r600_ring		dma;
474 	struct pipe_fence_handle	*last_gfx_fence;
475 	struct pipe_fence_handle	*last_sdma_fence;
476 	struct r600_resource		*eop_bug_scratch;
477 	unsigned			num_gfx_cs_flushes;
478 	unsigned			initial_gfx_cs_size;
479 	unsigned			last_dirty_tex_counter;
480 	unsigned			last_compressed_colortex_counter;
481 	unsigned			last_num_draw_calls;
482 
483 	struct threaded_context		*tc;
484 	struct u_suballocator		allocator_zeroed_memory;
485 	struct slab_child_pool		pool_transfers;
486 	struct slab_child_pool		pool_transfers_unsync; /* for threaded_context */
487 
488 	/* Current unaccounted memory usage. */
489 	uint64_t			vram;
490 	uint64_t			gtt;
491 
492 	/* States. */
493 	struct r600_streamout		streamout;
494 	struct r600_scissors		scissors;
495 	struct r600_viewports		viewports;
496 	bool				scissor_enabled;
497 	bool				clip_halfz;
498 	bool				vs_writes_viewport_index;
499 	bool				vs_disables_clipping_viewport;
500 
501 	/* Additional context states. */
502 	unsigned flags; /* flush flags */
503 
504 	/* Queries. */
505 	/* Maintain the list of active queries for pausing between IBs. */
506 	int				num_occlusion_queries;
507 	int				num_perfect_occlusion_queries;
508 	struct list_head		active_queries;
509 	unsigned			num_cs_dw_queries_suspend;
510 	/* Misc stats. */
511 	unsigned			num_draw_calls;
512 	unsigned			num_decompress_calls;
513 	unsigned			num_mrt_draw_calls;
514 	unsigned			num_prim_restart_calls;
515 	unsigned			num_spill_draw_calls;
516 	unsigned			num_compute_calls;
517 	unsigned			num_spill_compute_calls;
518 	unsigned			num_dma_calls;
519 	unsigned			num_cp_dma_calls;
520 	unsigned			num_vs_flushes;
521 	unsigned			num_ps_flushes;
522 	unsigned			num_cs_flushes;
523 	unsigned			num_cb_cache_flushes;
524 	unsigned			num_db_cache_flushes;
525 	unsigned			num_resident_handles;
526 	uint64_t			num_alloc_tex_transfer_bytes;
527 
528 	/* Render condition. */
529 	struct r600_atom		render_cond_atom;
530 	struct pipe_query		*render_cond;
531 	unsigned			render_cond_mode;
532 	bool				render_cond_invert;
533 	bool				render_cond_force_off; /* for u_blitter */
534 
535 	/* MSAA sample locations.
536 	 * The first index is the sample index.
537 	 * The second index is the coordinate: X, Y. */
538 	float				sample_locations_1x[1][2];
539 	float				sample_locations_2x[2][2];
540 	float				sample_locations_4x[4][2];
541 	float				sample_locations_8x[8][2];
542 	float				sample_locations_16x[16][2];
543 
544 	struct util_debug_callback	debug;
545 	struct pipe_device_reset_callback device_reset_callback;
546 	struct u_log_context		*log;
547 
548 	void				*query_result_shader;
549 
550 	/* Copy one resource to another using async DMA. */
551 	void (*dma_copy)(struct pipe_context *ctx,
552 			 struct pipe_resource *dst,
553 			 unsigned dst_level,
554 			 unsigned dst_x, unsigned dst_y, unsigned dst_z,
555 			 struct pipe_resource *src,
556 			 unsigned src_level,
557 			 const struct pipe_box *src_box);
558 
559 	void (*dma_clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
560 				 uint64_t offset, uint64_t size, unsigned value);
561 
562 	void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
563 			     uint64_t offset, uint64_t size, unsigned value,
564 			     enum r600_coherency coher);
565 
566 	void (*blit_decompress_depth)(struct pipe_context *ctx,
567 				      struct r600_texture *texture,
568 				      struct r600_texture *staging,
569 				      unsigned first_level, unsigned last_level,
570 				      unsigned first_layer, unsigned last_layer,
571 				      unsigned first_sample, unsigned last_sample);
572 
573 	/* Reallocate the buffer and update all resource bindings where
574 	 * the buffer is bound, including all resource descriptors. */
575 	void (*invalidate_buffer)(struct pipe_context *ctx, struct pipe_resource *buf);
576 
577 	/* Update all resource bindings where the buffer is bound, including
578 	 * all resource descriptors. This is invalidate_buffer without
579 	 * the invalidation. */
580 	void (*rebind_buffer)(struct pipe_context *ctx, struct pipe_resource *buf,
581 			      uint64_t old_gpu_address);
582 
583 	void (*save_qbo_state)(struct pipe_context *ctx, struct r600_qbo_state *st);
584 
585 	/* This ensures there is enough space in the command stream. */
586 	void (*need_gfx_cs_space)(struct pipe_context *ctx, unsigned num_dw,
587 				  bool include_draw_vbo);
588 
589 	void (*set_atom_dirty)(struct r600_common_context *ctx,
590 			       struct r600_atom *atom, bool dirty);
591 
592 	void (*check_vm_faults)(struct r600_common_context *ctx,
593 				struct radeon_saved_cs *saved,
594 				enum amd_ip_type ring);
595 };
596 
597 /* r600_buffer_common.c */
598 bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx,
599 				     struct pb_buffer_lean *buf,
600 				     unsigned usage);
601 void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx,
602                                       struct r600_resource *resource,
603                                       unsigned usage);
604 void r600_buffer_subdata(struct pipe_context *ctx,
605 			 struct pipe_resource *buffer,
606 			 unsigned usage, unsigned offset,
607 			 unsigned size, const void *data);
608 void r600_init_resource_fields(struct r600_common_screen *rscreen,
609 			       struct r600_resource *res,
610 			       uint64_t size, unsigned alignment);
611 bool r600_alloc_resource(struct r600_common_screen *rscreen,
612 			 struct r600_resource *res);
613 void r600_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *buf);
614 void r600_buffer_flush_region(struct pipe_context *ctx,
615 			      struct pipe_transfer *transfer,
616 			      const struct pipe_box *rel_box);
617 struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
618 					 const struct pipe_resource *templ,
619 					 unsigned alignment);
620 struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen,
621 						  unsigned flags,
622 						  unsigned usage,
623 						  unsigned size,
624 						  unsigned alignment);
625 struct pipe_resource *
626 r600_buffer_from_user_memory(struct pipe_screen *screen,
627 			     const struct pipe_resource *templ,
628 			     void *user_memory);
629 void
630 r600_invalidate_resource(struct pipe_context *ctx,
631 			 struct pipe_resource *resource);
632 void r600_replace_buffer_storage(struct pipe_context *ctx,
633 				 struct pipe_resource *dst,
634 				 struct pipe_resource *src);
635 void *r600_buffer_transfer_map(struct pipe_context *ctx,
636                                struct pipe_resource *resource,
637                                unsigned level,
638                                unsigned usage,
639                                const struct pipe_box *box,
640                                struct pipe_transfer **ptransfer);
641 void r600_buffer_transfer_unmap(struct pipe_context *ctx,
642 				struct pipe_transfer *transfer);
643 
644 /* r600_common_pipe.c */
645 void r600_gfx_write_event_eop(struct r600_common_context *ctx,
646 			      unsigned event, unsigned event_flags,
647 			      unsigned data_sel,
648 			      struct r600_resource *buf, uint64_t va,
649 			      uint32_t new_fence, unsigned query_type);
650 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
651 void r600_gfx_wait_fence(struct r600_common_context *ctx,
652 			 struct r600_resource *buf,
653 			 uint64_t va, uint32_t ref, uint32_t mask);
654 void r600_draw_rectangle(struct blitter_context *blitter,
655 			 void *vertex_elements_cso,
656 			 blitter_get_vs_func get_vs,
657 			 int x1, int y1, int x2, int y2,
658 			 float depth, unsigned num_instances,
659 			 enum blitter_attrib_type type,
660 			 const union blitter_attrib *attrib);
661 bool r600_common_screen_init(struct r600_common_screen *rscreen,
662 			     struct radeon_winsys *ws);
663 void r600_destroy_common_screen(struct r600_common_screen *rscreen);
664 void r600_preflush_suspend_features(struct r600_common_context *ctx);
665 void r600_postflush_resume_features(struct r600_common_context *ctx);
666 bool r600_common_context_init(struct r600_common_context *rctx,
667 			      struct r600_common_screen *rscreen,
668 			      unsigned context_flags);
669 void r600_common_context_cleanup(struct r600_common_context *rctx);
670 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
671 			  unsigned processor);
672 bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
673 			      unsigned processor);
674 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
675 			      uint64_t offset, uint64_t size, unsigned value);
676 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
677 						  const struct pipe_resource *templ);
678 const char *r600_get_llvm_processor_name(enum radeon_family family);
679 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
680 			 struct r600_resource *dst, struct r600_resource *src);
681 void radeon_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs,
682 		    struct radeon_saved_cs *saved, bool get_buffer_list);
683 void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
684 bool r600_check_device_reset(struct r600_common_context *rctx);
685 
686 /* r600_gpu_load.c */
687 void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
688 uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
689 unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
690 			  uint64_t begin);
691 
692 /* r600_perfcounters.c */
693 void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
694 
695 /* r600_query.c */
696 void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
697 void r600_query_init(struct r600_common_context *rctx);
698 void r600_suspend_queries(struct r600_common_context *ctx);
699 void r600_resume_queries(struct r600_common_context *ctx);
700 void r600_query_fix_enabled_rb_mask(struct r600_common_screen *rscreen);
701 
702 /* r600_streamout.c */
703 void r600_streamout_buffers_dirty(struct r600_common_context *rctx);
704 void r600_set_streamout_targets(struct pipe_context *ctx,
705 				unsigned num_targets,
706 				struct pipe_stream_output_target **targets,
707 				const unsigned *offset);
708 void r600_emit_streamout_end(struct r600_common_context *rctx);
709 void r600_update_prims_generated_query_state(struct r600_common_context *rctx,
710 					     unsigned type, int diff);
711 void r600_streamout_init(struct r600_common_context *rctx);
712 
713 /* r600_test_dma.c */
714 void r600_test_dma(struct r600_common_screen *rscreen);
715 
716 /* r600_texture.c */
717 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
718 				struct r600_texture *rdst,
719 				unsigned dst_level, unsigned dstx,
720 				unsigned dsty, unsigned dstz,
721 				struct r600_texture *rsrc,
722 				unsigned src_level,
723 				const struct pipe_box *src_box);
724 void r600_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex);
725 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
726 				 struct r600_texture *rtex,
727 				 unsigned nr_samples,
728 				 struct r600_fmask_info *out);
729 void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
730 				 struct r600_texture *rtex,
731 				 struct r600_cmask_info *out);
732 bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
733 				     struct pipe_resource *texture,
734 				     struct r600_texture **staging);
735 void r600_print_texture_info(struct r600_common_screen *rscreen,
736 			     struct r600_texture *rtex, struct u_log_context *log);
737 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
738 					const struct pipe_resource *templ);
739 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
740 						struct pipe_resource *texture,
741 						const struct pipe_surface *templ,
742 						unsigned width0, unsigned height0,
743 						unsigned width, unsigned height);
744 unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
745 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
746 				   struct pipe_framebuffer_state *fb,
747 				   struct r600_atom *fb_state,
748 				   unsigned *buffers, uint8_t *dirty_cbufs,
749 				   const union pipe_color_union *color);
750 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen);
751 void r600_init_context_texture_functions(struct r600_common_context *rctx);
752 void eg_resource_alloc_immed(struct r600_common_screen *rscreen,
753 			     struct r600_resource *res,
754 			     unsigned immed_size);
755 void *r600_texture_transfer_map(struct pipe_context *ctx,
756 			       struct pipe_resource *texture,
757 			       unsigned level,
758 			       unsigned usage,
759 			       const struct pipe_box *box,
760 			       struct pipe_transfer **ptransfer);
761 void r600_texture_transfer_unmap(struct pipe_context *ctx,
762 				struct pipe_transfer* transfer);
763 
764 /* r600_viewport.c */
765 void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
766 					    struct pipe_scissor_state *scissor);
767 void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
768 				 bool scissor_enable, bool clip_halfz);
769 void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
770 					  struct tgsi_shader_info *info);
771 void r600_init_viewport_functions(struct r600_common_context *rctx);
772 
773 /* cayman_msaa.c */
774 extern const uint32_t eg_sample_locs_2x[4];
775 extern const unsigned eg_max_dist_2x;
776 extern const uint32_t eg_sample_locs_4x[4];
777 extern const unsigned eg_max_dist_4x;
778 void cayman_get_sample_position(struct pipe_context *ctx, unsigned sample_count,
779 				unsigned sample_index, float *out_value);
780 void cayman_init_msaa(struct pipe_context *ctx);
781 void cayman_emit_msaa_state(struct radeon_cmdbuf *cs, int nr_samples,
782 			    int ps_iter_samples, int overrast_samples);
783 
784 
785 /* Inline helpers. */
786 
r600_resource(struct pipe_resource * r)787 static inline struct r600_resource *r600_resource(struct pipe_resource *r)
788 {
789 	return (struct r600_resource*)r;
790 }
791 
792 static inline void
r600_resource_reference(struct r600_resource ** ptr,struct r600_resource * res)793 r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
794 {
795 	pipe_resource_reference((struct pipe_resource **)ptr,
796 				(struct pipe_resource *)res);
797 }
798 
799 static inline void
r600_texture_reference(struct r600_texture ** ptr,struct r600_texture * res)800 r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
801 {
802 	pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
803 }
804 
805 static inline void
r600_context_add_resource_size(struct pipe_context * ctx,struct pipe_resource * r)806 r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resource *r)
807 {
808 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
809 	struct r600_resource *res = (struct r600_resource *)r;
810 
811 	if (res) {
812 		/* Add memory usage for need_gfx_cs_space */
813 		rctx->vram += res->vram_usage;
814 		rctx->gtt += res->gart_usage;
815 	}
816 }
817 
r600_get_strmout_en(struct r600_common_context * rctx)818 static inline bool r600_get_strmout_en(struct r600_common_context *rctx)
819 {
820 	return rctx->streamout.streamout_enabled ||
821 	       rctx->streamout.prims_gen_query_enabled;
822 }
823 
824 #define     SQ_TEX_XY_FILTER_POINT                         0x00
825 #define     SQ_TEX_XY_FILTER_BILINEAR                      0x01
826 #define     SQ_TEX_XY_FILTER_ANISO_POINT                   0x02
827 #define     SQ_TEX_XY_FILTER_ANISO_BILINEAR                0x03
828 
eg_tex_filter(unsigned filter,unsigned max_aniso)829 static inline unsigned eg_tex_filter(unsigned filter, unsigned max_aniso)
830 {
831 	if (filter == PIPE_TEX_FILTER_LINEAR)
832 		return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_BILINEAR
833 				     : SQ_TEX_XY_FILTER_BILINEAR;
834 	else
835 		return max_aniso > 1 ? SQ_TEX_XY_FILTER_ANISO_POINT
836 				     : SQ_TEX_XY_FILTER_POINT;
837 }
838 
r600_tex_aniso_filter(unsigned filter)839 static inline unsigned r600_tex_aniso_filter(unsigned filter)
840 {
841 	if (filter < 2)
842 		return 0;
843 	if (filter < 4)
844 		return 1;
845 	if (filter < 8)
846 		return 2;
847 	if (filter < 16)
848 		return 3;
849 	return 4;
850 }
851 
r600_wavefront_size(enum radeon_family family)852 static inline unsigned r600_wavefront_size(enum radeon_family family)
853 {
854 	switch (family) {
855 	case CHIP_RV610:
856 	case CHIP_RS780:
857 	case CHIP_RV620:
858 	case CHIP_RS880:
859 		return 16;
860 	case CHIP_RV630:
861 	case CHIP_RV635:
862 	case CHIP_RV730:
863 	case CHIP_RV710:
864 	case CHIP_PALM:
865 	case CHIP_CEDAR:
866 		return 32;
867 	default:
868 		return 64;
869 	}
870 }
871 
872 static inline unsigned
r600_get_sampler_view_priority(struct r600_resource * res)873 r600_get_sampler_view_priority(struct r600_resource *res)
874 {
875 	if (res->b.b.target == PIPE_BUFFER)
876 		return RADEON_PRIO_SAMPLER_BUFFER;
877 
878 	if (res->b.b.nr_samples > 1)
879 		return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
880 
881 	return RADEON_PRIO_SAMPLER_TEXTURE;
882 }
883 
884 static inline bool
r600_can_sample_zs(struct r600_texture * tex,bool stencil_sampler)885 r600_can_sample_zs(struct r600_texture *tex, bool stencil_sampler)
886 {
887 	return (stencil_sampler && tex->can_sample_s) ||
888 	       (!stencil_sampler && tex->can_sample_z);
889 }
890 
891 static inline bool
r600_htile_enabled(struct r600_texture * tex,unsigned level)892 r600_htile_enabled(struct r600_texture *tex, unsigned level)
893 {
894 	return tex->htile_offset && level == 0;
895 }
896 
897 #define COMPUTE_DBG(rscreen, fmt, args...) \
898 	do { \
899 		if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, fmt, ##args); \
900 	} while (0);
901 
902 #define R600_ERR(fmt, args...) \
903 	fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
904 
905 /* For MSAA sample positions. */
906 #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
907 	(((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) |		   \
908 	(((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) |	   \
909 	(((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) |	   \
910 	 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
911 
S_FIXED(float value,unsigned frac_bits)912 static inline int S_FIXED(float value, unsigned frac_bits)
913 {
914 	return value * (1 << frac_bits);
915 }
916 
917 #endif
918