1 /*
2 * Copyright © 2012-2018 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #ifndef FREEDRENO_RINGBUFFER_H_
10 #define FREEDRENO_RINGBUFFER_H_
11
12 #include <stdio.h>
13 #include "util/u_atomic.h"
14 #include "util/u_debug.h"
15
16 #include "adreno_common.xml.h"
17 #include "adreno_pm4.xml.h"
18 #include "freedreno_drmif.h"
19 #include "freedreno_pm4.h"
20
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24
25 struct fd_submit;
26 struct fd_ringbuffer;
27
28 enum fd_ringbuffer_flags {
29
30 /* Primary ringbuffer for a submit, ie. an IB1 level rb
31 * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
32 * packets.
33 */
34 FD_RINGBUFFER_PRIMARY = 0x1,
35
36 /* Hint that the stateobj will be used for streaming state
37 * that is used once or a few times and then discarded.
38 *
39 * For sub-allocation, non streaming stateobj's should be
40 * sub-allocated from a page size buffer, so one long lived
41 * state obj doesn't prevent other pages from being freed.
42 * (Ie. it would be no worse than allocating a page sized
43 * bo for each small non-streaming stateobj).
44 *
45 * But streaming stateobj's could be sub-allocated from a
46 * larger buffer to reduce the alloc/del overhead.
47 */
48 FD_RINGBUFFER_STREAMING = 0x2,
49
50 /* Indicates that "growable" cmdstream can be used,
51 * consisting of multiple physical cmdstream buffers
52 */
53 FD_RINGBUFFER_GROWABLE = 0x4,
54
55 /* Internal use only: */
56 _FD_RINGBUFFER_OBJECT = 0x8,
57 };
58
59 /* A submit object manages/tracks all the state buildup for a "submit"
60 * ioctl to the kernel. Additionally, with the exception of long-lived
61 * non-STREAMING stateobj rb's, rb's are allocated from the submit.
62 */
63 struct fd_submit *fd_submit_new(struct fd_pipe *pipe);
64
65 /* NOTE: all ringbuffer's create from the submit should be unref'd
66 * before destroying the submit.
67 */
68 void fd_submit_del(struct fd_submit *submit);
69
70 struct fd_submit * fd_submit_ref(struct fd_submit *submit);
71
72 /* Allocate a new rb from the submit. */
73 struct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit,
74 uint32_t size,
75 enum fd_ringbuffer_flags flags);
76
77 /* in_fence_fd: -1 for no in-fence, else fence fd
78 * if use_fence_fd is true the output fence will be dma_fence fd backed
79 */
80 struct fd_fence *fd_submit_flush(struct fd_submit *submit, int in_fence_fd,
81 bool use_fence_fd);
82
83 struct fd_ringbuffer;
84 struct fd_reloc;
85
86 struct fd_ringbuffer_funcs {
87 void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
88
89 /**
90 * Alternative to emit_reloc for the softpin case, where we only need
91 * to track that the bo is used (and not track all the extra info that
92 * the kernel would need to do a legacy reloc.
93 */
94 void (*emit_bo)(struct fd_ringbuffer *ring, struct fd_bo *bo);
95 void (*assert_attached)(struct fd_ringbuffer *ring, struct fd_bo *bo);
96
97 void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
98 uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
99 struct fd_ringbuffer *target, uint32_t cmd_idx);
100 uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
101 bool (*check_size)(struct fd_ringbuffer *ring);
102 void (*destroy)(struct fd_ringbuffer *ring);
103 };
104
105 /* the ringbuffer object is not opaque so that OUT_RING() type stuff
106 * can be inlined. Note that users should not make assumptions about
107 * the size of this struct.
108 */
109 struct fd_ringbuffer {
110 uint32_t *cur, *end, *start;
111 const struct fd_ringbuffer_funcs *funcs;
112
113 // size or end coudl probably go away
114 int size;
115 int32_t refcnt;
116 enum fd_ringbuffer_flags flags;
117 };
118
119 /* Allocate a new long-lived state object, not associated with
120 * a submit:
121 */
122 struct fd_ringbuffer *fd_ringbuffer_new_object(struct fd_pipe *pipe,
123 uint32_t size);
124
125 /*
126 * Helpers for ref/unref with some extra debugging.. unref() returns true if
127 * the object is still live
128 */
129
130 static inline void
ref(int32_t * ref)131 ref(int32_t *ref)
132 {
133 ASSERTED int32_t count = p_atomic_inc_return(ref);
134 /* We should never see a refcnt transition 0->1, this is a sign of a
135 * zombie coming back from the dead!
136 */
137 assert(count != 1);
138 }
139
140 static inline bool
unref(int32_t * ref)141 unref(int32_t *ref)
142 {
143 int32_t count = p_atomic_dec_return(ref);
144 assert(count != -1);
145 return count == 0;
146 }
147
148 static inline void
fd_ringbuffer_del(struct fd_ringbuffer * ring)149 fd_ringbuffer_del(struct fd_ringbuffer *ring)
150 {
151 if (--ring->refcnt > 0)
152 return;
153
154 ring->funcs->destroy(ring);
155 }
156
157 static inline struct fd_ringbuffer *
fd_ringbuffer_ref(struct fd_ringbuffer * ring)158 fd_ringbuffer_ref(struct fd_ringbuffer *ring)
159 {
160 ring->refcnt++;
161 return ring;
162 }
163
164 static inline void
fd_ringbuffer_grow(struct fd_ringbuffer * ring,uint32_t ndwords)165 fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
166 {
167 assert(ring->funcs->grow); /* unsupported on kgsl */
168
169 ring->funcs->grow(ring, ring->size);
170 }
171
172 static inline bool
fd_ringbuffer_check_size(struct fd_ringbuffer * ring)173 fd_ringbuffer_check_size(struct fd_ringbuffer *ring)
174 {
175 return ring->funcs->check_size(ring);
176 }
177
178 static inline void
fd_ringbuffer_emit(struct fd_ringbuffer * ring,uint32_t data)179 fd_ringbuffer_emit(struct fd_ringbuffer *ring, uint32_t data)
180 {
181 (*ring->cur++) = data;
182 }
183
184 struct fd_reloc {
185 struct fd_bo *bo;
186 uint64_t iova;
187 uint64_t orval;
188 #define FD_RELOC_READ 0x0001
189 #define FD_RELOC_WRITE 0x0002
190 #define FD_RELOC_DUMP 0x0004
191 uint32_t offset;
192 int32_t shift;
193 };
194
195 /* We always mark BOs for write, instead of tracking it across reloc
196 * sources in userspace. On the kernel side, this means we track a single
197 * excl fence in the BO instead of a set of read fences, which is cheaper.
198 * The downside is that a dmabuf-shared device won't be able to read in
199 * parallel with a read-only access by freedreno, but most other drivers
200 * have decided that that usecase isn't important enough to do this
201 * tracking, as well.
202 */
203 #define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE)
204
205 /* NOTE: relocs are 2 dwords on a5xx+ */
206
207 static inline void
fd_ringbuffer_attach_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)208 fd_ringbuffer_attach_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
209 {
210 ring->funcs->emit_bo(ring, bo);
211 }
212
213 static inline void
fd_ringbuffer_assert_attached(struct fd_ringbuffer * ring,struct fd_bo * bo)214 fd_ringbuffer_assert_attached(struct fd_ringbuffer *ring, struct fd_bo *bo)
215 {
216 #ifndef NDEBUG
217 ring->funcs->assert_attached(ring, bo);
218 #endif
219 }
220
221 static inline void
fd_ringbuffer_reloc(struct fd_ringbuffer * ring,const struct fd_reloc * reloc)222 fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc)
223 {
224 ring->funcs->emit_reloc(ring, reloc);
225 }
226
227 static inline uint32_t
fd_ringbuffer_cmd_count(struct fd_ringbuffer * ring)228 fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
229 {
230 if (!ring->funcs->cmd_count)
231 return 1;
232 return ring->funcs->cmd_count(ring);
233 }
234
235 static inline uint32_t
fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer * ring,struct fd_ringbuffer * target,uint32_t cmd_idx)236 fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
237 struct fd_ringbuffer *target,
238 uint32_t cmd_idx)
239 {
240 return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
241 }
242
243 static inline uint32_t
offset_bytes(void * end,void * start)244 offset_bytes(void *end, void *start)
245 {
246 return ((char *)end) - ((char *)start);
247 }
248
249 static inline uint32_t
fd_ringbuffer_size(struct fd_ringbuffer * ring)250 fd_ringbuffer_size(struct fd_ringbuffer *ring)
251 {
252 /* only really needed for stateobj ringbuffers, and won't really
253 * do what you expect for growable rb's.. so lets just restrict
254 * this to stateobj's for now:
255 */
256 assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
257 return offset_bytes(ring->cur, ring->start);
258 }
259
260 static inline bool
fd_ringbuffer_empty(struct fd_ringbuffer * ring)261 fd_ringbuffer_empty(struct fd_ringbuffer *ring)
262 {
263 return (fd_ringbuffer_cmd_count(ring) == 1) &&
264 (offset_bytes(ring->cur, ring->start) == 0);
265 }
266
267 #define LOG_DWORDS 0
268
269 static inline void
OUT_RING(struct fd_ringbuffer * ring,uint32_t data)270 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
271 {
272 if (LOG_DWORDS) {
273 fprintf(stderr, "ring[%p]: OUT_RING %04x: %08x", ring,
274 (uint32_t)(ring->cur - ring->start), data);
275 }
276 fd_ringbuffer_emit(ring, data);
277 }
278
279 static inline uint64_t
__reloc_iova(struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)280 __reloc_iova(struct fd_bo *bo, uint32_t offset, uint64_t orval, int32_t shift)
281 {
282 uint64_t iova = fd_bo_get_iova(bo) + offset;
283
284 if (shift < 0)
285 iova >>= -shift;
286 else
287 iova <<= shift;
288
289 iova |= orval;
290
291 return iova;
292 }
293
294 /*
295 * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+
296 */
297 static inline void
OUT_RELOC(struct fd_ringbuffer * ring,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)298 OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset,
299 uint64_t orval, int32_t shift)
300 {
301 if (LOG_DWORDS) {
302 fprintf(stderr, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
303 (uint32_t)(ring->cur - ring->start), bo, offset, shift);
304 }
305 assert(offset < fd_bo_size(bo));
306
307 uint64_t iova = __reloc_iova(bo, offset, orval, shift);
308
309 #if FD_BO_NO_HARDPIN
310 uint64_t *cur = (uint64_t *)ring->cur;
311 *cur = iova;
312 ring->cur += 2;
313 fd_ringbuffer_assert_attached(ring, bo);
314 #else
315 struct fd_reloc reloc = {
316 .bo = bo,
317 .iova = iova,
318 .orval = orval,
319 .offset = offset,
320 .shift = shift,
321 };
322
323 fd_ringbuffer_reloc(ring, &reloc);
324 #endif
325 }
326
327 static inline void
OUT_RB(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)328 OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
329 {
330 fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
331 }
332
333 static inline void
BEGIN_RING(struct fd_ringbuffer * ring,uint32_t ndwords)334 BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
335 {
336 if (unlikely(ring->cur + ndwords > ring->end))
337 fd_ringbuffer_grow(ring, ndwords);
338 }
339
340 static inline void
OUT_PKT0(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)341 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
342 {
343 BEGIN_RING(ring, cnt + 1);
344 OUT_RING(ring, pm4_pkt0_hdr(regindx, cnt));
345 }
346
347 static inline void
OUT_PKT2(struct fd_ringbuffer * ring)348 OUT_PKT2(struct fd_ringbuffer *ring)
349 {
350 BEGIN_RING(ring, 1);
351 OUT_RING(ring, CP_TYPE2_PKT);
352 }
353
354 static inline void
OUT_PKT3(struct fd_ringbuffer * ring,uint8_t opcode,uint16_t cnt)355 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
356 {
357 BEGIN_RING(ring, cnt + 1);
358 OUT_RING(ring, CP_TYPE3_PKT | ((cnt - 1) << 16) | ((opcode & 0xFF) << 8));
359 }
360
361 /*
362 * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
363 */
364
365 static inline void
OUT_PKT4(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)366 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
367 {
368 BEGIN_RING(ring, cnt + 1);
369 OUT_RING(ring, pm4_pkt4_hdr((uint16_t)regindx, (uint16_t)cnt));
370 }
371
372 static inline void
OUT_PKT7(struct fd_ringbuffer * ring,uint32_t opcode,uint32_t cnt)373 OUT_PKT7(struct fd_ringbuffer *ring, uint32_t opcode, uint32_t cnt)
374 {
375 BEGIN_RING(ring, cnt + 1);
376 OUT_RING(ring, pm4_pkt7_hdr((uint8_t)opcode, (uint16_t)cnt));
377 }
378
379 static inline void
OUT_WFI(struct fd_ringbuffer * ring)380 OUT_WFI(struct fd_ringbuffer *ring)
381 {
382 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
383 OUT_RING(ring, 0x00000000);
384 }
385
386 static inline void
OUT_WFI5(struct fd_ringbuffer * ring)387 OUT_WFI5(struct fd_ringbuffer *ring)
388 {
389 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
390 }
391
392 #ifdef __cplusplus
393 } /* end of extern "C" */
394 #endif
395
396 #endif /* FREEDRENO_RINGBUFFER_H_ */
397