xref: /aosp_15_r20/external/mesa3d/src/freedreno/drm/freedreno_ringbuffer.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2012-2018 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #ifndef FREEDRENO_RINGBUFFER_H_
10 #define FREEDRENO_RINGBUFFER_H_
11 
12 #include <stdio.h>
13 #include "util/u_atomic.h"
14 #include "util/u_debug.h"
15 
16 #include "adreno_common.xml.h"
17 #include "adreno_pm4.xml.h"
18 #include "freedreno_drmif.h"
19 #include "freedreno_pm4.h"
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 struct fd_submit;
26 struct fd_ringbuffer;
27 
28 enum fd_ringbuffer_flags {
29 
30    /* Primary ringbuffer for a submit, ie. an IB1 level rb
31     * which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
32     * packets.
33     */
34    FD_RINGBUFFER_PRIMARY = 0x1,
35 
36    /* Hint that the stateobj will be used for streaming state
37     * that is used once or a few times and then discarded.
38     *
39     * For sub-allocation, non streaming stateobj's should be
40     * sub-allocated from a page size buffer, so one long lived
41     * state obj doesn't prevent other pages from being freed.
42     * (Ie. it would be no worse than allocating a page sized
43     * bo for each small non-streaming stateobj).
44     *
45     * But streaming stateobj's could be sub-allocated from a
46     * larger buffer to reduce the alloc/del overhead.
47     */
48    FD_RINGBUFFER_STREAMING = 0x2,
49 
50    /* Indicates that "growable" cmdstream can be used,
51     * consisting of multiple physical cmdstream buffers
52     */
53    FD_RINGBUFFER_GROWABLE = 0x4,
54 
55    /* Internal use only: */
56    _FD_RINGBUFFER_OBJECT = 0x8,
57 };
58 
59 /* A submit object manages/tracks all the state buildup for a "submit"
60  * ioctl to the kernel.  Additionally, with the exception of long-lived
61  * non-STREAMING stateobj rb's, rb's are allocated from the submit.
62  */
63 struct fd_submit *fd_submit_new(struct fd_pipe *pipe);
64 
65 /* NOTE: all ringbuffer's create from the submit should be unref'd
66  * before destroying the submit.
67  */
68 void fd_submit_del(struct fd_submit *submit);
69 
70 struct fd_submit * fd_submit_ref(struct fd_submit *submit);
71 
72 /* Allocate a new rb from the submit. */
73 struct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit,
74                                                uint32_t size,
75                                                enum fd_ringbuffer_flags flags);
76 
77 /* in_fence_fd: -1 for no in-fence, else fence fd
78  * if use_fence_fd is true the output fence will be dma_fence fd backed
79  */
80 struct fd_fence *fd_submit_flush(struct fd_submit *submit, int in_fence_fd,
81                                  bool use_fence_fd);
82 
83 struct fd_ringbuffer;
84 struct fd_reloc;
85 
86 struct fd_ringbuffer_funcs {
87    void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
88 
89    /**
90     * Alternative to emit_reloc for the softpin case, where we only need
91     * to track that the bo is used (and not track all the extra info that
92     * the kernel would need to do a legacy reloc.
93     */
94    void (*emit_bo)(struct fd_ringbuffer *ring, struct fd_bo *bo);
95    void (*assert_attached)(struct fd_ringbuffer *ring, struct fd_bo *bo);
96 
97    void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
98    uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
99                                struct fd_ringbuffer *target, uint32_t cmd_idx);
100    uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
101    bool (*check_size)(struct fd_ringbuffer *ring);
102    void (*destroy)(struct fd_ringbuffer *ring);
103 };
104 
105 /* the ringbuffer object is not opaque so that OUT_RING() type stuff
106  * can be inlined.  Note that users should not make assumptions about
107  * the size of this struct.
108  */
109 struct fd_ringbuffer {
110    uint32_t *cur, *end, *start;
111    const struct fd_ringbuffer_funcs *funcs;
112 
113    // size or end coudl probably go away
114    int size;
115    int32_t refcnt;
116    enum fd_ringbuffer_flags flags;
117 };
118 
119 /* Allocate a new long-lived state object, not associated with
120  * a submit:
121  */
122 struct fd_ringbuffer *fd_ringbuffer_new_object(struct fd_pipe *pipe,
123                                                uint32_t size);
124 
125 /*
126  * Helpers for ref/unref with some extra debugging.. unref() returns true if
127  * the object is still live
128  */
129 
130 static inline void
ref(int32_t * ref)131 ref(int32_t *ref)
132 {
133    ASSERTED int32_t count = p_atomic_inc_return(ref);
134    /* We should never see a refcnt transition 0->1, this is a sign of a
135     * zombie coming back from the dead!
136     */
137    assert(count != 1);
138 }
139 
140 static inline bool
unref(int32_t * ref)141 unref(int32_t *ref)
142 {
143    int32_t count = p_atomic_dec_return(ref);
144    assert(count != -1);
145    return count == 0;
146 }
147 
148 static inline void
fd_ringbuffer_del(struct fd_ringbuffer * ring)149 fd_ringbuffer_del(struct fd_ringbuffer *ring)
150 {
151    if (--ring->refcnt > 0)
152       return;
153 
154    ring->funcs->destroy(ring);
155 }
156 
157 static inline struct fd_ringbuffer *
fd_ringbuffer_ref(struct fd_ringbuffer * ring)158 fd_ringbuffer_ref(struct fd_ringbuffer *ring)
159 {
160    ring->refcnt++;
161    return ring;
162 }
163 
164 static inline void
fd_ringbuffer_grow(struct fd_ringbuffer * ring,uint32_t ndwords)165 fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
166 {
167    assert(ring->funcs->grow); /* unsupported on kgsl */
168 
169    ring->funcs->grow(ring, ring->size);
170 }
171 
172 static inline bool
fd_ringbuffer_check_size(struct fd_ringbuffer * ring)173 fd_ringbuffer_check_size(struct fd_ringbuffer *ring)
174 {
175    return ring->funcs->check_size(ring);
176 }
177 
178 static inline void
fd_ringbuffer_emit(struct fd_ringbuffer * ring,uint32_t data)179 fd_ringbuffer_emit(struct fd_ringbuffer *ring, uint32_t data)
180 {
181    (*ring->cur++) = data;
182 }
183 
184 struct fd_reloc {
185    struct fd_bo *bo;
186    uint64_t iova;
187    uint64_t orval;
188 #define FD_RELOC_READ  0x0001
189 #define FD_RELOC_WRITE 0x0002
190 #define FD_RELOC_DUMP  0x0004
191    uint32_t offset;
192    int32_t shift;
193 };
194 
195 /* We always mark BOs for write, instead of tracking it across reloc
196  * sources in userspace.  On the kernel side, this means we track a single
197  * excl fence in the BO instead of a set of read fences, which is cheaper.
198  * The downside is that a dmabuf-shared device won't be able to read in
199  * parallel with a read-only access by freedreno, but most other drivers
200  * have decided that that usecase isn't important enough to do this
201  * tracking, as well.
202  */
203 #define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE)
204 
205 /* NOTE: relocs are 2 dwords on a5xx+ */
206 
207 static inline void
fd_ringbuffer_attach_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)208 fd_ringbuffer_attach_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
209 {
210    ring->funcs->emit_bo(ring, bo);
211 }
212 
213 static inline void
fd_ringbuffer_assert_attached(struct fd_ringbuffer * ring,struct fd_bo * bo)214 fd_ringbuffer_assert_attached(struct fd_ringbuffer *ring, struct fd_bo *bo)
215 {
216 #ifndef NDEBUG
217    ring->funcs->assert_attached(ring, bo);
218 #endif
219 }
220 
221 static inline void
fd_ringbuffer_reloc(struct fd_ringbuffer * ring,const struct fd_reloc * reloc)222 fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc)
223 {
224    ring->funcs->emit_reloc(ring, reloc);
225 }
226 
227 static inline uint32_t
fd_ringbuffer_cmd_count(struct fd_ringbuffer * ring)228 fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
229 {
230    if (!ring->funcs->cmd_count)
231       return 1;
232    return ring->funcs->cmd_count(ring);
233 }
234 
235 static inline uint32_t
fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer * ring,struct fd_ringbuffer * target,uint32_t cmd_idx)236 fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
237                                    struct fd_ringbuffer *target,
238                                    uint32_t cmd_idx)
239 {
240    return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
241 }
242 
243 static inline uint32_t
offset_bytes(void * end,void * start)244 offset_bytes(void *end, void *start)
245 {
246    return ((char *)end) - ((char *)start);
247 }
248 
249 static inline uint32_t
fd_ringbuffer_size(struct fd_ringbuffer * ring)250 fd_ringbuffer_size(struct fd_ringbuffer *ring)
251 {
252    /* only really needed for stateobj ringbuffers, and won't really
253     * do what you expect for growable rb's.. so lets just restrict
254     * this to stateobj's for now:
255     */
256    assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
257    return offset_bytes(ring->cur, ring->start);
258 }
259 
260 static inline bool
fd_ringbuffer_empty(struct fd_ringbuffer * ring)261 fd_ringbuffer_empty(struct fd_ringbuffer *ring)
262 {
263    return (fd_ringbuffer_cmd_count(ring) == 1) &&
264           (offset_bytes(ring->cur, ring->start) == 0);
265 }
266 
267 #define LOG_DWORDS 0
268 
269 static inline void
OUT_RING(struct fd_ringbuffer * ring,uint32_t data)270 OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
271 {
272    if (LOG_DWORDS) {
273       fprintf(stderr, "ring[%p]: OUT_RING   %04x:  %08x", ring,
274               (uint32_t)(ring->cur - ring->start), data);
275    }
276    fd_ringbuffer_emit(ring, data);
277 }
278 
279 static inline uint64_t
__reloc_iova(struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)280 __reloc_iova(struct fd_bo *bo, uint32_t offset, uint64_t orval, int32_t shift)
281 {
282    uint64_t iova = fd_bo_get_iova(bo) + offset;
283 
284    if (shift < 0)
285       iova >>= -shift;
286    else
287       iova <<= shift;
288 
289    iova |= orval;
290 
291    return iova;
292 }
293 
294 /*
295  * NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+
296  */
297 static inline void
OUT_RELOC(struct fd_ringbuffer * ring,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)298 OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset,
299           uint64_t orval, int32_t shift)
300 {
301    if (LOG_DWORDS) {
302       fprintf(stderr, "ring[%p]: OUT_RELOC   %04x:  %p+%u << %d", ring,
303               (uint32_t)(ring->cur - ring->start), bo, offset, shift);
304    }
305    assert(offset < fd_bo_size(bo));
306 
307    uint64_t iova = __reloc_iova(bo, offset, orval, shift);
308 
309 #if FD_BO_NO_HARDPIN
310    uint64_t *cur = (uint64_t *)ring->cur;
311    *cur = iova;
312    ring->cur += 2;
313    fd_ringbuffer_assert_attached(ring, bo);
314 #else
315    struct fd_reloc reloc = {
316          .bo = bo,
317          .iova = iova,
318          .orval = orval,
319          .offset = offset,
320          .shift = shift,
321    };
322 
323    fd_ringbuffer_reloc(ring, &reloc);
324 #endif
325 }
326 
327 static inline void
OUT_RB(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)328 OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
329 {
330    fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
331 }
332 
333 static inline void
BEGIN_RING(struct fd_ringbuffer * ring,uint32_t ndwords)334 BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
335 {
336    if (unlikely(ring->cur + ndwords > ring->end))
337       fd_ringbuffer_grow(ring, ndwords);
338 }
339 
340 static inline void
OUT_PKT0(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)341 OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
342 {
343    BEGIN_RING(ring, cnt + 1);
344    OUT_RING(ring, pm4_pkt0_hdr(regindx, cnt));
345 }
346 
347 static inline void
OUT_PKT2(struct fd_ringbuffer * ring)348 OUT_PKT2(struct fd_ringbuffer *ring)
349 {
350    BEGIN_RING(ring, 1);
351    OUT_RING(ring, CP_TYPE2_PKT);
352 }
353 
354 static inline void
OUT_PKT3(struct fd_ringbuffer * ring,uint8_t opcode,uint16_t cnt)355 OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
356 {
357    BEGIN_RING(ring, cnt + 1);
358    OUT_RING(ring, CP_TYPE3_PKT | ((cnt - 1) << 16) | ((opcode & 0xFF) << 8));
359 }
360 
361 /*
362  * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
363  */
364 
365 static inline void
OUT_PKT4(struct fd_ringbuffer * ring,uint16_t regindx,uint16_t cnt)366 OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
367 {
368    BEGIN_RING(ring, cnt + 1);
369    OUT_RING(ring, pm4_pkt4_hdr((uint16_t)regindx, (uint16_t)cnt));
370 }
371 
372 static inline void
OUT_PKT7(struct fd_ringbuffer * ring,uint32_t opcode,uint32_t cnt)373 OUT_PKT7(struct fd_ringbuffer *ring, uint32_t opcode, uint32_t cnt)
374 {
375    BEGIN_RING(ring, cnt + 1);
376    OUT_RING(ring, pm4_pkt7_hdr((uint8_t)opcode, (uint16_t)cnt));
377 }
378 
379 static inline void
OUT_WFI(struct fd_ringbuffer * ring)380 OUT_WFI(struct fd_ringbuffer *ring)
381 {
382    OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
383    OUT_RING(ring, 0x00000000);
384 }
385 
386 static inline void
OUT_WFI5(struct fd_ringbuffer * ring)387 OUT_WFI5(struct fd_ringbuffer *ring)
388 {
389    OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
390 }
391 
392 #ifdef __cplusplus
393 } /* end of extern "C" */
394 #endif
395 
396 #endif /* FREEDRENO_RINGBUFFER_H_ */
397