xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r600/evergreen_hw_context.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2010 Jerome Glisse <[email protected]>
3  * Authors:
4  *      Jerome Glisse
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "r600_pipe.h"
9 #include "evergreend.h"
10 #include "util/u_memory.h"
11 #include "util/u_math.h"
12 
evergreen_dma_copy_buffer(struct r600_context * rctx,struct pipe_resource * dst,struct pipe_resource * src,uint64_t dst_offset,uint64_t src_offset,uint64_t size)13 void evergreen_dma_copy_buffer(struct r600_context *rctx,
14 			       struct pipe_resource *dst,
15 			       struct pipe_resource *src,
16 			       uint64_t dst_offset,
17 			       uint64_t src_offset,
18 			       uint64_t size)
19 {
20 	struct radeon_cmdbuf *cs = &rctx->b.dma.cs;
21 	unsigned i, ncopy, csize, sub_cmd, shift;
22 	struct r600_resource *rdst = (struct r600_resource*)dst;
23 	struct r600_resource *rsrc = (struct r600_resource*)src;
24 
25 	/* Mark the buffer range of destination as valid (initialized),
26 	 * so that transfer_map knows it should wait for the GPU when mapping
27 	 * that range. */
28 	util_range_add(&rdst->b.b, &rdst->valid_buffer_range, dst_offset,
29 		       dst_offset + size);
30 
31 	dst_offset += rdst->gpu_address;
32 	src_offset += rsrc->gpu_address;
33 
34 	/* see if we use dword or byte copy */
35 	if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) {
36 		size >>= 2;
37 		sub_cmd = EG_DMA_COPY_DWORD_ALIGNED;
38 		shift = 2;
39 	} else {
40 		sub_cmd = EG_DMA_COPY_BYTE_ALIGNED;
41 		shift = 0;
42 	}
43 	ncopy = (size / EG_DMA_COPY_MAX_SIZE) + !!(size % EG_DMA_COPY_MAX_SIZE);
44 
45 	r600_need_dma_space(&rctx->b, ncopy * 5, rdst, rsrc);
46 	for (i = 0; i < ncopy; i++) {
47 		csize = size < EG_DMA_COPY_MAX_SIZE ? size : EG_DMA_COPY_MAX_SIZE;
48 		/* emit reloc before writing cs so that cs is always in consistent state */
49 		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rsrc, RADEON_USAGE_READ);
50 		radeon_add_to_buffer_list(&rctx->b, &rctx->b.dma, rdst, RADEON_USAGE_WRITE);
51 		radeon_emit(cs, DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize));
52 		radeon_emit(cs, dst_offset & 0xffffffff);
53 		radeon_emit(cs, src_offset & 0xffffffff);
54 		radeon_emit(cs, (dst_offset >> 32UL) & 0xff);
55 		radeon_emit(cs, (src_offset >> 32UL) & 0xff);
56 		dst_offset += csize << shift;
57 		src_offset += csize << shift;
58 		size -= csize;
59 	}
60 }
61 
62 /* The max number of bytes to copy per packet. */
63 #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
64 
evergreen_cp_dma_clear_buffer(struct r600_context * rctx,struct pipe_resource * dst,uint64_t offset,unsigned size,uint32_t clear_value,enum r600_coherency coher)65 void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
66 				   struct pipe_resource *dst, uint64_t offset,
67 				   unsigned size, uint32_t clear_value,
68 				   enum r600_coherency coher)
69 {
70 	struct radeon_cmdbuf *cs = &rctx->b.gfx.cs;
71 
72 	assert(size);
73 	assert(rctx->screen->b.has_cp_dma);
74 
75 	/* Mark the buffer range of destination as valid (initialized),
76 	 * so that transfer_map knows it should wait for the GPU when mapping
77 	 * that range. */
78 	util_range_add(dst, &r600_resource(dst)->valid_buffer_range, offset,
79 		       offset + size);
80 
81 	offset += r600_resource(dst)->gpu_address;
82 
83 	/* Flush the cache where the resource is bound. */
84 	rctx->b.flags |= r600_get_flush_flags(coher) |
85 			 R600_CONTEXT_WAIT_3D_IDLE;
86 
87 	while (size) {
88 		unsigned sync = 0;
89 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
90 		unsigned reloc;
91 
92 		r600_need_cs_space(rctx,
93 				   10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
94 				   R600_MAX_PFP_SYNC_ME_DWORDS, false, 0);
95 
96 		/* Flush the caches for the first copy only. */
97 		if (rctx->b.flags) {
98 			r600_flush_emit(rctx);
99 		}
100 
101 		/* Do the synchronization after the last copy, so that all data is written to memory. */
102 		if (size == byte_count) {
103 			sync = PKT3_CP_DMA_CP_SYNC;
104 		}
105 
106 		/* This must be done after r600_need_cs_space. */
107 		reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
108 					      (struct r600_resource*)dst, RADEON_USAGE_WRITE |
109 					      RADEON_PRIO_CP_DMA);
110 
111 		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
112 		radeon_emit(cs, clear_value);	/* DATA [31:0] */
113 		radeon_emit(cs, sync | PKT3_CP_DMA_SRC_SEL(2));	/* CP_SYNC [31] | SRC_SEL[30:29] */
114 		radeon_emit(cs, offset);	/* DST_ADDR_LO [31:0] */
115 		radeon_emit(cs, (offset >> 32) & 0xff);		/* DST_ADDR_HI [7:0] */
116 		radeon_emit(cs, byte_count);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
117 
118 		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
119 		radeon_emit(cs, reloc);
120 
121 		size -= byte_count;
122 		offset += byte_count;
123 	}
124 
125 	/* CP DMA is executed in ME, but index buffers are read by PFP.
126 	 * This ensures that ME (CP DMA) is idle before PFP starts fetching
127 	 * indices. If we wanted to execute CP DMA in PFP, this packet
128 	 * should precede it.
129 	 */
130 	if (coher == R600_COHERENCY_SHADER)
131 		r600_emit_pfp_sync_me(rctx);
132 }
133