1 /*
2 * Copyright © 2008 Jérôme Glisse
3 * Copyright © 2011 Marek Olšák <[email protected]>
4 * Copyright © 2015 Advanced Micro Devices, Inc.
5 *
6 * SPDX-License-Identifier: MIT
7 */
8
9 #ifndef AMDGPU_BO_H
10 #define AMDGPU_BO_H
11
12 #include "amdgpu_winsys.h"
13 #include "pipebuffer/pb_slab.h"
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18
19 struct amdgpu_sparse_backing_chunk;
20
21 /*
22 * Sub-allocation information for a real buffer used as backing memory of a
23 * sparse buffer.
24 */
25 struct amdgpu_sparse_backing {
26 struct list_head list;
27
28 struct amdgpu_bo_real *bo;
29
30 /* Sorted list of free chunks. */
31 struct amdgpu_sparse_backing_chunk *chunks;
32 uint32_t max_chunks;
33 uint32_t num_chunks;
34 };
35
36 struct amdgpu_sparse_commitment {
37 struct amdgpu_sparse_backing *backing;
38 uint32_t page;
39 };
40
41 enum amdgpu_bo_type {
42 AMDGPU_BO_SLAB_ENTRY,
43 AMDGPU_BO_SPARSE,
44 AMDGPU_BO_REAL, /* only REAL enums can be present after this */
45 AMDGPU_BO_REAL_REUSABLE, /* only REAL_REUSABLE enums can be present after this */
46 AMDGPU_BO_REAL_REUSABLE_SLAB,
47 };
48
49 /* Anything above REAL will use the BO list for REAL. */
50 #define NUM_BO_LIST_TYPES (AMDGPU_BO_REAL + 1)
51
52 /* Base class of the buffer object that other structures inherit. */
53 struct amdgpu_winsys_bo {
54 struct pb_buffer_lean base;
55 enum amdgpu_bo_type type:8;
56 struct amdgpu_seq_no_fences fences;
57
58 /* Since some IPs like VCN want to have an unlimited number of queues, we can't generate our
59 * own sequence numbers for those queues. Instead, each buffer will have "alt_fence", which
60 * means an alternative fence. This fence is the last use of that buffer on any VCN queue.
61 * If any other queue wants to use that buffer, it has to insert alt_fence as a dependency,
62 * and replace alt_fence with the new submitted fence, so that it's always equal to the last
63 * use.
64 *
65 * Only VCN uses and updates alt_fence when an IB is submitted. Other IPs only use alt_fence
66 * as a fence dependency. alt_fence is NULL when VCN isn't used, so there is no negative
67 * impact on CPU overhead in that case.
68 */
69 struct pipe_fence_handle *alt_fence;
70
71 /* This is set when a buffer is returned by buffer_create(), not when the memory is allocated
72 * as part of slab BO.
73 */
74 uint32_t unique_id;
75
76 /* how many command streams, which are being emitted in a separate
77 * thread, is this bo referenced in? */
78 volatile int num_active_ioctls;
79 };
80
81 /* Real GPU memory allocation managed by the amdgpu kernel driver.
82 *
83 * There are also types of buffers that are not "real" kernel allocations, such as slab entry
84 * BOs, which are suballocated from real BOs, and sparse BOs, which initially only allocate
85 * the virtual address range, not memory.
86 */
87 struct amdgpu_bo_real {
88 struct amdgpu_winsys_bo b;
89
90 amdgpu_bo_handle bo_handle;
91 amdgpu_va_handle va_handle;
92 void *cpu_ptr; /* for user_ptr and permanent maps */
93 int map_count;
94 uint32_t kms_handle;
95 #if MESA_DEBUG
96 struct list_head global_list_item;
97 #endif
98 simple_mtx_t map_lock;
99
100 bool is_user_ptr;
101
102 /* Whether buffer_get_handle or buffer_from_handle has been called,
103 * it can only transition from false to true. Protected by lock.
104 */
105 bool is_shared;
106
107 /* Whether this is a slab buffer and alt_fence was set on one of the slab entries. */
108 bool slab_has_busy_alt_fences;
109 };
110
111 /* Same as amdgpu_bo_real except this BO isn't destroyed when its reference count drops to 0.
112 * Instead it's cached in pb_cache for later reuse.
113 */
114 struct amdgpu_bo_real_reusable {
115 struct amdgpu_bo_real b;
116 struct pb_cache_entry cache_entry;
117 };
118
119 /* Sparse BO. This only allocates the virtual address range for the BO. The physical storage is
120 * allocated on demand by the user using radeon_winsys::buffer_commit with 64KB granularity.
121 */
122 struct amdgpu_bo_sparse {
123 struct amdgpu_winsys_bo b;
124 amdgpu_va_handle va_handle;
125
126 uint32_t num_va_pages;
127 uint32_t num_backing_pages;
128 simple_mtx_t commit_lock;
129
130 struct list_head backing;
131
132 /* Commitment information for each page of the virtual memory area. */
133 struct amdgpu_sparse_commitment *commitments;
134 };
135
136 /* Suballocated buffer using the slab allocator. This BO is only 1 piece of a larger buffer
137 * called slab, which is a buffer that's divided into smaller equal-sized buffers.
138 */
139 struct amdgpu_bo_slab_entry {
140 struct amdgpu_winsys_bo b;
141 struct pb_slab_entry entry;
142 };
143
144 /* The slab buffer, which is the big backing buffer out of which smaller BOs are suballocated and
145 * represented by amdgpu_bo_slab_entry. It's always a real and reusable buffer.
146 */
147 struct amdgpu_bo_real_reusable_slab {
148 struct amdgpu_bo_real_reusable b;
149 struct pb_slab slab;
150 struct amdgpu_bo_slab_entry *entries;
151 };
152
is_real_bo(struct amdgpu_winsys_bo * bo)153 static inline bool is_real_bo(struct amdgpu_winsys_bo *bo)
154 {
155 return bo->type >= AMDGPU_BO_REAL;
156 }
157
get_real_bo(struct amdgpu_winsys_bo * bo)158 static inline struct amdgpu_bo_real *get_real_bo(struct amdgpu_winsys_bo *bo)
159 {
160 assert(is_real_bo(bo));
161 return (struct amdgpu_bo_real*)bo;
162 }
163
get_real_bo_reusable(struct amdgpu_winsys_bo * bo)164 static inline struct amdgpu_bo_real_reusable *get_real_bo_reusable(struct amdgpu_winsys_bo *bo)
165 {
166 assert(bo->type >= AMDGPU_BO_REAL_REUSABLE);
167 return (struct amdgpu_bo_real_reusable*)bo;
168 }
169
get_sparse_bo(struct amdgpu_winsys_bo * bo)170 static inline struct amdgpu_bo_sparse *get_sparse_bo(struct amdgpu_winsys_bo *bo)
171 {
172 assert(bo->type == AMDGPU_BO_SPARSE && bo->base.usage & RADEON_FLAG_SPARSE);
173 return (struct amdgpu_bo_sparse*)bo;
174 }
175
get_slab_entry_bo(struct amdgpu_winsys_bo * bo)176 static inline struct amdgpu_bo_slab_entry *get_slab_entry_bo(struct amdgpu_winsys_bo *bo)
177 {
178 assert(bo->type == AMDGPU_BO_SLAB_ENTRY);
179 return (struct amdgpu_bo_slab_entry*)bo;
180 }
181
get_bo_from_slab(struct pb_slab * slab)182 static inline struct amdgpu_bo_real_reusable_slab *get_bo_from_slab(struct pb_slab *slab)
183 {
184 return container_of(slab, struct amdgpu_bo_real_reusable_slab, slab);
185 }
186
get_slab_entry_real_bo(struct amdgpu_winsys_bo * bo)187 static inline struct amdgpu_bo_real *get_slab_entry_real_bo(struct amdgpu_winsys_bo *bo)
188 {
189 assert(bo->type == AMDGPU_BO_SLAB_ENTRY);
190 return &get_bo_from_slab(((struct amdgpu_bo_slab_entry*)bo)->entry.slab)->b.b;
191 }
192
get_real_bo_reusable_slab(struct amdgpu_winsys_bo * bo)193 static struct amdgpu_bo_real_reusable_slab *get_real_bo_reusable_slab(struct amdgpu_winsys_bo *bo)
194 {
195 assert(bo->type == AMDGPU_BO_REAL_REUSABLE_SLAB);
196 return (struct amdgpu_bo_real_reusable_slab*)bo;
197 }
198
199 /* Given a sequence number "fences->seq_no[queue_index]", return a pointer to a non-NULL fence
200 * pointer in the queue ring corresponding to that sequence number if the fence is non-NULL.
201 * If the fence is not present in the ring (= is idle), return NULL. If it returns a non-NULL
202 * pointer and the caller finds the fence to be idle, it's recommended to use the returned pointer
203 * to set the fence to NULL in the ring, which is why we return a pointer to a pointer.
204 */
205 static inline struct pipe_fence_handle **
get_fence_from_ring(struct amdgpu_winsys * aws,struct amdgpu_seq_no_fences * fences,unsigned queue_index)206 get_fence_from_ring(struct amdgpu_winsys *aws, struct amdgpu_seq_no_fences *fences,
207 unsigned queue_index)
208 {
209 /* The caller should check if the BO has a fence. */
210 assert(queue_index < AMDGPU_MAX_QUEUES);
211 assert(fences->valid_fence_mask & BITFIELD_BIT(queue_index));
212
213 uint_seq_no buffer_seq_no = fences->seq_no[queue_index];
214 uint_seq_no latest_seq_no = aws->queues[queue_index].latest_seq_no;
215 bool fence_present = latest_seq_no - buffer_seq_no < AMDGPU_FENCE_RING_SIZE;
216
217 if (fence_present) {
218 struct pipe_fence_handle **fence =
219 &aws->queues[queue_index].fences[buffer_seq_no % AMDGPU_FENCE_RING_SIZE];
220
221 if (*fence)
222 return fence;
223 }
224
225 /* If the sequence number references a fence that is not present, it's guaranteed to be idle
226 * because the winsys always waits for the oldest fence when it removes it from the ring.
227 */
228 fences->valid_fence_mask &= ~BITFIELD_BIT(queue_index);
229 return NULL;
230 }
231
pick_latest_seq_no(struct amdgpu_winsys * aws,unsigned queue_index,uint_seq_no n1,uint_seq_no n2)232 static inline uint_seq_no pick_latest_seq_no(struct amdgpu_winsys *aws, unsigned queue_index,
233 uint_seq_no n1, uint_seq_no n2)
234 {
235 uint_seq_no latest = aws->queues[queue_index].latest_seq_no;
236
237 /* Since sequence numbers can wrap around, we need to pick the later number that's logically
238 * before "latest". The trick is to subtract "latest + 1" to underflow integer such
239 * that "latest" becomes UINT*_MAX, and then just return the maximum.
240 */
241 uint_seq_no s1 = n1 - latest - 1;
242 uint_seq_no s2 = n2 - latest - 1;
243
244 return s1 >= s2 ? n1 : n2;
245 }
246
add_seq_no_to_list(struct amdgpu_winsys * aws,struct amdgpu_seq_no_fences * fences,unsigned queue_index,uint_seq_no seq_no)247 static inline void add_seq_no_to_list(struct amdgpu_winsys *aws, struct amdgpu_seq_no_fences *fences,
248 unsigned queue_index, uint_seq_no seq_no)
249 {
250 if (fences->valid_fence_mask & BITFIELD_BIT(queue_index)) {
251 fences->seq_no[queue_index] = pick_latest_seq_no(aws, queue_index, seq_no,
252 fences->seq_no[queue_index]);
253 } else {
254 fences->seq_no[queue_index] = seq_no;
255 fences->valid_fence_mask |= BITFIELD_BIT(queue_index);
256 }
257 }
258
259 bool amdgpu_bo_can_reclaim(struct amdgpu_winsys *aws, struct pb_buffer_lean *_buf);
260 struct pb_buffer_lean *amdgpu_bo_create(struct amdgpu_winsys *aws,
261 uint64_t size,
262 unsigned alignment,
263 enum radeon_bo_domain domain,
264 enum radeon_bo_flag flags);
265 void amdgpu_bo_destroy(struct amdgpu_winsys *aws, struct pb_buffer_lean *_buf);
266 void *amdgpu_bo_map(struct radeon_winsys *rws,
267 struct pb_buffer_lean *buf,
268 struct radeon_cmdbuf *rcs,
269 enum pipe_map_flags usage);
270 void amdgpu_bo_unmap(struct radeon_winsys *rws, struct pb_buffer_lean *buf);
271 void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *sws);
272
273 bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry);
274 struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, unsigned entry_size,
275 unsigned group_index);
276 void amdgpu_bo_slab_free(struct amdgpu_winsys *aws, struct pb_slab *slab);
277 uint64_t amdgpu_bo_get_va(struct pb_buffer_lean *buf);
278
279 static inline struct amdgpu_winsys_bo *
amdgpu_winsys_bo(struct pb_buffer_lean * bo)280 amdgpu_winsys_bo(struct pb_buffer_lean *bo)
281 {
282 return (struct amdgpu_winsys_bo *)bo;
283 }
284
285 static inline void
amdgpu_winsys_bo_reference(struct amdgpu_winsys * aws,struct amdgpu_winsys_bo ** dst,struct amdgpu_winsys_bo * src)286 amdgpu_winsys_bo_reference(struct amdgpu_winsys *aws, struct amdgpu_winsys_bo **dst,
287 struct amdgpu_winsys_bo *src)
288 {
289 radeon_bo_reference(&aws->dummy_sws.base,
290 (struct pb_buffer_lean**)dst, (struct pb_buffer_lean*)src);
291 }
292
293 /* Same as amdgpu_winsys_bo_reference, but ignore the value in *dst. */
294 static inline void
amdgpu_winsys_bo_set_reference(struct amdgpu_winsys_bo ** dst,struct amdgpu_winsys_bo * src)295 amdgpu_winsys_bo_set_reference(struct amdgpu_winsys_bo **dst, struct amdgpu_winsys_bo *src)
296 {
297 radeon_bo_set_reference((struct pb_buffer_lean**)dst, (struct pb_buffer_lean*)src);
298 }
299
300 /* Unreference dst, but don't assign anything. */
301 static inline void
amdgpu_winsys_bo_drop_reference(struct amdgpu_winsys * aws,struct amdgpu_winsys_bo * dst)302 amdgpu_winsys_bo_drop_reference(struct amdgpu_winsys *aws, struct amdgpu_winsys_bo *dst)
303 {
304 radeon_bo_drop_reference(&aws->dummy_sws.base, &dst->base);
305 }
306
307 #ifdef __cplusplus
308 }
309 #endif
310
311 #endif
312