xref: /aosp_15_r20/external/mesa3d/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2011 Marek Olšák <[email protected]>
3  * Copyright © 2015 Advanced Micro Devices, Inc.
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #ifndef AMDGPU_CS_H
9 #define AMDGPU_CS_H
10 
11 #include "amdgpu_bo.h"
12 #include "util/u_memory.h"
13 #include "drm-uapi/amdgpu_drm.h"
14 
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18 
19 /* Smaller submits means the GPU gets busy sooner and there is less
20  * waiting for buffers and fences. Proof:
21  *   http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
22  */
23 #define IB_MAX_SUBMIT_BYTES (80 * 1024)
24 
25 struct amdgpu_ctx {
26    struct pipe_reference reference;
27    struct amdgpu_winsys *aws;
28    amdgpu_context_handle ctx;
29    amdgpu_bo_handle user_fence_bo;
30    uint64_t *user_fence_cpu_address_base;
31 
32    /* If true, report lost contexts and skip command submission.
33     * If false, terminate the process.
34     */
35    bool allow_context_lost;
36 
37    /* Lost context status due to ioctl and allocation failures. */
38    enum pipe_reset_status sw_status;
39 };
40 
41 struct amdgpu_cs_buffer {
42    struct amdgpu_winsys_bo *bo;
43    unsigned usage;
44 };
45 
46 enum ib_type {
47    IB_PREAMBLE,
48    IB_MAIN,
49    IB_NUM,
50 };
51 
52 struct amdgpu_ib {
53    /* A buffer out of which new IBs are allocated. */
54    struct pb_buffer_lean   *big_buffer;
55    uint8_t                 *big_buffer_cpu_ptr;
56    uint64_t                gpu_address;
57    unsigned                used_ib_space;
58 
59    /* The maximum seen size from cs_check_space. If the driver does
60     * cs_check_space and flush, the newly allocated IB should have at least
61     * this size.
62     */
63    unsigned                max_check_space_size;
64 
65    unsigned                max_ib_bytes;
66    /* ptr_ib_size initially points to cs->csc->chunk_ib->ib_bytes.
67     * If in amdgpu_cs_check_space() ib chaining is required, then ptr_ib_size will point
68     * to indirect buffer packet size field.
69     */
70    uint32_t                *ptr_ib_size;
71    bool                    is_chained_ib;
72 };
73 
74 struct amdgpu_fence_list {
75    struct pipe_fence_handle    **list;
76    unsigned                    num;
77    unsigned                    max;
78 };
79 
80 struct amdgpu_buffer_list {
81    unsigned                    max_buffers;
82    unsigned                    num_buffers;
83    struct amdgpu_cs_buffer     *buffers;
84 };
85 
86 struct amdgpu_cs_context {
87    struct drm_amdgpu_cs_chunk_ib chunk_ib[IB_NUM];
88    uint32_t                    *ib_main_addr; /* the beginning of IB before chaining */
89 
90    struct amdgpu_winsys *aws;
91 
92    /* Buffers. */
93    struct amdgpu_buffer_list   buffer_lists[NUM_BO_LIST_TYPES];
94    int16_t                     *buffer_indices_hashlist;
95 
96    struct amdgpu_winsys_bo     *last_added_bo;
97    unsigned                    last_added_bo_usage;
98 
99    struct amdgpu_seq_no_fences seq_no_dependencies;
100 
101    struct amdgpu_fence_list    syncobj_dependencies;
102    struct amdgpu_fence_list    syncobj_to_signal;
103 
104    struct pipe_fence_handle    *fence;
105 
106    /* the error returned from cs_flush for non-async submissions */
107    int                         error_code;
108 
109    /* TMZ: will this command be submitted using the TMZ flag */
110    bool secure;
111 };
112 
113 /* This high limit is needed for viewperf2020/catia. */
114 #define BUFFER_HASHLIST_SIZE 32768
115 
116 struct amdgpu_cs {
117    struct amdgpu_ib main_ib; /* must be first because this is inherited */
118    struct amdgpu_winsys *aws;
119    struct amdgpu_ctx *ctx;
120 
121    /*
122     * Ensure a 64-bit alignment for drm_amdgpu_cs_chunk_fence.
123     */
124    struct drm_amdgpu_cs_chunk_fence fence_chunk;
125    enum amd_ip_type ip_type;
126    unsigned queue_index;
127 
128    /* Whether this queue uses amdgpu_winsys_bo::alt_fence instead of generating its own
129     * sequence numbers for synchronization.
130     */
131    bool uses_alt_fence;
132 
133    /* We flip between these two CS. While one is being consumed
134     * by the kernel in another thread, the other one is being filled
135     * by the pipe driver. */
136    struct amdgpu_cs_context csc1;
137    struct amdgpu_cs_context csc2;
138    /* The currently-used CS. */
139    struct amdgpu_cs_context *csc;
140    /* The CS being currently-owned by the other thread. */
141    struct amdgpu_cs_context *cst;
142    /* buffer_indices_hashlist[hash(bo)] returns -1 if the bo
143     * isn't part of any buffer lists or the index where the bo could be found.
144     * Since 1) hash collisions of 2 different bo can happen and 2) we use a
145     * single hashlist for the 3 buffer list, this is only a hint.
146     * amdgpu_lookup_buffer uses this hint to speed up buffers look up.
147     */
148    int16_t buffer_indices_hashlist[BUFFER_HASHLIST_SIZE];
149 
150    /* Flush CS. */
151    void (*flush_cs)(void *ctx, unsigned flags, struct pipe_fence_handle **fence);
152    void *flush_data;
153    bool noop;
154    bool has_chaining;
155 
156    struct util_queue_fence flush_completed;
157    struct pipe_fence_handle *next_fence;
158    struct pb_buffer_lean *preamble_ib_bo;
159 
160    struct drm_amdgpu_cs_chunk_cp_gfx_shadow mcbp_fw_shadow_chunk;
161 };
162 
163 struct amdgpu_fence {
164    struct pipe_reference reference;
165    uint32_t syncobj;
166 
167    struct amdgpu_winsys *aws;
168 
169    /* The following field aren't set for imported fences. */
170    struct amdgpu_ctx *ctx;  /* submission context */
171    uint32_t ip_type;
172    uint64_t *user_fence_cpu_address;
173    uint64_t seq_no;
174 
175    /* If the fence has been submitted. This is unsignalled for deferred fences
176     * (cs->next_fence) and while an IB is still being submitted in the submit
177     * thread. */
178    struct util_queue_fence submitted;
179 
180    volatile int signalled;              /* bool (int for atomicity) */
181    bool imported;
182    uint8_t queue_index;       /* for non-imported fences */
183    uint_seq_no queue_seq_no;  /* winsys-generated sequence number */
184 };
185 
186 void amdgpu_fence_destroy(struct amdgpu_fence *fence);
187 
amdgpu_ctx_reference(struct amdgpu_ctx ** dst,struct amdgpu_ctx * src)188 static inline void amdgpu_ctx_reference(struct amdgpu_ctx **dst, struct amdgpu_ctx *src)
189 {
190    struct amdgpu_ctx *old_dst = *dst;
191 
192    if (pipe_reference(old_dst ? &old_dst->reference : NULL,
193                       src ? &src->reference : NULL)) {
194       amdgpu_cs_ctx_free(old_dst->ctx);
195       amdgpu_bo_cpu_unmap(old_dst->user_fence_bo);
196       amdgpu_bo_free(old_dst->user_fence_bo);
197       FREE(old_dst);
198    }
199    *dst = src;
200 }
201 
amdgpu_fence_reference(struct pipe_fence_handle ** dst,struct pipe_fence_handle * src)202 static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
203                                           struct pipe_fence_handle *src)
204 {
205    struct amdgpu_fence **adst = (struct amdgpu_fence **)dst;
206    struct amdgpu_fence *asrc = (struct amdgpu_fence *)src;
207 
208    if (pipe_reference(&(*adst)->reference, &asrc->reference))
209       amdgpu_fence_destroy(*adst);
210 
211    *adst = asrc;
212 }
213 
214 /* Same as amdgpu_fence_reference, but ignore the value in *dst. */
amdgpu_fence_set_reference(struct pipe_fence_handle ** dst,struct pipe_fence_handle * src)215 static inline void amdgpu_fence_set_reference(struct pipe_fence_handle **dst,
216                                               struct pipe_fence_handle *src)
217 {
218    *dst = src;
219    pipe_reference(NULL, &((struct amdgpu_fence *)src)->reference); /* only increment refcount */
220 }
221 
222 /* Unreference dst, but don't assign anything. */
amdgpu_fence_drop_reference(struct pipe_fence_handle * dst)223 static inline void amdgpu_fence_drop_reference(struct pipe_fence_handle *dst)
224 {
225    struct amdgpu_fence *adst = (struct amdgpu_fence *)dst;
226 
227    if (pipe_reference(&adst->reference, NULL)) /* only decrement refcount */
228       amdgpu_fence_destroy(adst);
229 }
230 
231 struct amdgpu_cs_buffer *
232 amdgpu_lookup_buffer_any_type(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo);
233 
234 static inline struct amdgpu_cs *
amdgpu_cs(struct radeon_cmdbuf * rcs)235 amdgpu_cs(struct radeon_cmdbuf *rcs)
236 {
237    struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs->priv;
238    assert(cs);
239    return cs;
240 }
241 
242 #define get_container(member_ptr, container_type, container_member) \
243    (container_type *)((char *)(member_ptr) - offsetof(container_type, container_member))
244 
245 static inline bool
amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs * cs,struct amdgpu_winsys_bo * bo)246 amdgpu_bo_is_referenced_by_cs(struct amdgpu_cs *cs,
247                               struct amdgpu_winsys_bo *bo)
248 {
249    return amdgpu_lookup_buffer_any_type(cs->csc, bo) != NULL;
250 }
251 
get_buf_list_idx(struct amdgpu_winsys_bo * bo)252 static inline unsigned get_buf_list_idx(struct amdgpu_winsys_bo *bo)
253 {
254    /* AMDGPU_BO_REAL_REUSABLE* maps to AMDGPU_BO_REAL. */
255    static_assert(ARRAY_SIZE(((struct amdgpu_cs_context*)NULL)->buffer_lists) == NUM_BO_LIST_TYPES, "");
256    return MIN2(bo->type, AMDGPU_BO_REAL);
257 }
258 
259 static inline bool
amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs * cs,struct amdgpu_winsys_bo * bo,unsigned usage)260 amdgpu_bo_is_referenced_by_cs_with_usage(struct amdgpu_cs *cs,
261                                          struct amdgpu_winsys_bo *bo,
262                                          unsigned usage)
263 {
264    struct amdgpu_cs_buffer *buffer = amdgpu_lookup_buffer_any_type(cs->csc, bo);
265 
266    return buffer && (buffer->usage & usage) != 0;
267 }
268 
269 bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
270                        bool absolute);
271 void amdgpu_cs_sync_flush(struct radeon_cmdbuf *rcs);
272 void amdgpu_cs_init_functions(struct amdgpu_screen_winsys *sws);
273 
274 #ifdef __cplusplus
275 }
276 #endif
277 
278 #endif
279