1 /*
2 * Copyright (c) 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * The aux map provides a multi-level lookup of the main surface address which
26 * ends up providing information about the auxiliary surface data, including
27 * the address where the auxiliary data resides.
28 *
29 * The below sections depict address splitting and formats of table entries of
30 * TGL platform. These may vary on other platforms.
31 *
32 * The 48-bit VMA (GPU) address of the main surface is split to do the address
33 * lookup:
34 *
35 * 48 bit address of main surface
36 * +--------+--------+--------+------+
37 * | 47:36 | 35:24 | 23:16 | 15:0 |
38 * | L3-idx | L2-idx | L1-idx | ... |
39 * +--------+--------+--------+------+
40 *
41 * The GFX_AUX_TABLE_BASE_ADDR points to a buffer. The L3 Table Entry is
42 * located by indexing into this buffer as a uint64_t array using the L3-idx
43 * value. The 64-bit L3 entry is defined as:
44 *
45 * +-------+-------------+------+---+
46 * | 63:48 | 47:15 | 14:1 | 0 |
47 * | ... | L2-tbl-addr | ... | V |
48 * +-------+-------------+------+---+
49 *
50 * If the `V` (valid) bit is set, then the L2-tbl-addr gives the address for
51 * the level-2 table entries, with the lower address bits filled with zero.
52 * The L2 Table Entry is located by indexing into this buffer as a uint64_t
53 * array using the L2-idx value. The 64-bit L2 entry is similar to the L3
54 * entry, except with 2 additional address bits:
55 *
56 * +-------+-------------+------+---+
57 * | 63:48 | 47:13 | 12:1 | 0 |
58 * | ... | L1-tbl-addr | ... | V |
59 * +-------+-------------+------+---+
60 *
61 * If the `V` bit is set, then the L1-tbl-addr gives the address for the
62 * level-1 table entries, with the lower address bits filled with zero. The L1
63 * Table Entry is located by indexing into this buffer as a uint64_t array
64 * using the L1-idx value. The 64-bit L1 entry is defined as:
65 *
66 * +--------+------+-------+-------+-------+---------------+-----+---+
67 * | 63:58 | 57 | 56:54 | 53:52 | 51:48 | 47:8 | 7:1 | 0 |
68 * | Format | Y/Cr | Depth | TM | ... | aux-data-addr | ... | V |
69 * +--------+------+-------+-------+-------+---------------+-----+---+
70 *
71 * Where:
72 * - Format: See `get_format_encoding`
73 * - Y/Cr: 0=Y(Luma), 1=Cr(Chroma)
74 * - (bit) Depth: See `get_bpp_encoding`
75 * - TM (Tile-mode): 0=Ys, 1=Y, 2=rsvd, 3=rsvd
76 * - aux-data-addr: VMA/GPU address for the aux-data
77 * - V: entry is valid
78 *
79 * BSpec 44930
80 */
81
82 #include "intel_aux_map.h"
83 #include "intel_gem.h"
84
85 #include "dev/intel_device_info.h"
86 #include "isl/isl.h"
87
88 #include "util/list.h"
89 #include "util/ralloc.h"
90 #include "util/u_atomic.h"
91 #include "util/u_math.h"
92
93 #include <inttypes.h>
94 #include <stdlib.h>
95 #include <stdio.h>
96 #include <pthread.h>
97
98 #define INTEL_AUX_MAP_FORMAT_BITS_MASK 0xfff0000000000000ull
99
100 /* Mask with the firt 48bits set */
101 #define VALID_ADDRESS_MASK ((1ull << 48) - 1)
102
103 #define L3_ENTRY_L2_ADDR_MASK 0xffffffff8000ull
104
105 #define L3_L2_BITS_PER_LEVEL 12
106 #define L3_L2_SUB_TABLE_LEN (sizeof(uint64_t) * (1ull << L3_L2_BITS_PER_LEVEL))
107
108 static const bool aux_map_debug = false;
109
110 /**
111 * Auxiliary surface mapping formats
112 *
113 * Several formats of AUX mapping exist. The supported formats
114 * are designated by generation and granularity here. A device
115 * can support more than one format, depending on Hardware, but
116 * we expect only one of them of a device is needed. Otherwise,
117 * we could need to change this enum into a bit map in such case
118 * later.
119 */
120 enum intel_aux_map_format {
121 /**
122 * 64KB granularity format on GFX12 devices
123 */
124 INTEL_AUX_MAP_GFX12_64KB = 0,
125
126 /**
127 * 1MB granularity format on GFX125 devices
128 */
129 INTEL_AUX_MAP_GFX125_1MB,
130
131 INTEL_AUX_MAP_LAST,
132 };
133
134 /**
135 * An incomplete description of AUX mapping formats
136 *
137 * Theoretically, many things can be different, depending on hardware
138 * design like level of page tables, address splitting, format bits
139 * etc. We only manage the known delta to simplify the implementation
140 * this time.
141 */
142 struct aux_format_info {
143 /**
144 * Granularity of main surface in compression. It must be power of 2.
145 */
146 uint64_t main_page_size;
147 /**
148 * Page size of level 1 page table. It must be power of 2.
149 */
150 uint64_t l1_page_size;
151 /**
152 * Mask of index bits of level 1 page table in address splitting.
153 */
154 uint64_t l1_index_mask;
155 /**
156 * Offset of index bits of level 1 page table in address splitting.
157 */
158 uint64_t l1_index_offset;
159 };
160
161 static const struct aux_format_info aux_formats[] = {
162 [INTEL_AUX_MAP_GFX12_64KB] = {
163 .main_page_size = 64 * 1024,
164 .l1_page_size = 8 * 1024,
165 .l1_index_mask = 0xff,
166 .l1_index_offset = 16,
167 },
168 [INTEL_AUX_MAP_GFX125_1MB] = {
169 .main_page_size = 1024 * 1024,
170 .l1_page_size = 2 * 1024,
171 .l1_index_mask = 0xf,
172 .l1_index_offset = 20,
173 },
174 };
175
176 struct aux_map_buffer {
177 struct list_head link;
178 struct intel_buffer *buffer;
179 };
180
181 struct intel_aux_level {
182 /* GPU address of the current level */
183 uint64_t address;
184
185 /* Pointer to the GPU entries of this level */
186 uint64_t *entries;
187
188 union {
189 /* Host tracking of a parent level to its children (only use on L3/L2
190 * levels which have 4096 entries)
191 */
192 struct intel_aux_level *children[4096];
193
194 /* Refcount of AUX pages at the L1 level (MTL has only 16 entries in L1,
195 * which Gfx12 has 256 entries)
196 */
197 uint32_t ref_counts[256];
198 };
199 };
200
201 struct intel_aux_map_context {
202 void *driver_ctx;
203 pthread_mutex_t mutex;
204 struct intel_aux_level *l3_level;
205 struct intel_mapped_pinned_buffer_alloc *buffer_alloc;
206 uint32_t num_buffers;
207 struct list_head buffers;
208 uint32_t tail_offset, tail_remaining;
209 uint32_t state_num;
210 const struct aux_format_info *format;
211 };
212
213 static inline uint64_t
get_page_mask(const uint64_t page_size)214 get_page_mask(const uint64_t page_size)
215 {
216 return page_size - 1;
217 }
218
219 static inline uint64_t
get_meta_page_size(const struct aux_format_info * info)220 get_meta_page_size(const struct aux_format_info *info)
221 {
222 return info->main_page_size / INTEL_AUX_MAP_MAIN_SIZE_SCALEDOWN;
223 }
224
225 static inline uint64_t
get_index(const uint64_t main_address,const uint64_t index_mask,const uint64_t index_offset)226 get_index(const uint64_t main_address,
227 const uint64_t index_mask, const uint64_t index_offset)
228 {
229 return (main_address >> index_offset) & index_mask;
230 }
231
232 uint64_t
intel_aux_get_meta_address_mask(struct intel_aux_map_context * ctx)233 intel_aux_get_meta_address_mask(struct intel_aux_map_context *ctx)
234 {
235 return (~get_page_mask(get_meta_page_size(ctx->format))) & VALID_ADDRESS_MASK;
236 }
237
238 uint64_t
intel_aux_main_to_aux_offset(struct intel_aux_map_context * ctx,uint64_t main_offset)239 intel_aux_main_to_aux_offset(struct intel_aux_map_context *ctx,
240 uint64_t main_offset)
241 {
242 return main_offset / INTEL_AUX_MAP_MAIN_SIZE_SCALEDOWN;
243 }
244
245 static const struct aux_format_info *
get_format(enum intel_aux_map_format format)246 get_format(enum intel_aux_map_format format)
247 {
248
249 assert(format < INTEL_AUX_MAP_LAST);
250 assert(ARRAY_SIZE(aux_formats) == INTEL_AUX_MAP_LAST);
251 return &aux_formats[format];
252 }
253
254 static enum intel_aux_map_format
select_format(const struct intel_device_info * devinfo)255 select_format(const struct intel_device_info *devinfo)
256 {
257 if (devinfo->verx10 >= 125)
258 return INTEL_AUX_MAP_GFX125_1MB;
259 else if (devinfo->verx10 == 120)
260 return INTEL_AUX_MAP_GFX12_64KB;
261 else
262 return INTEL_AUX_MAP_LAST;
263 }
264
265 static bool
add_buffer(struct intel_aux_map_context * ctx)266 add_buffer(struct intel_aux_map_context *ctx)
267 {
268 struct aux_map_buffer *buf = rzalloc(ctx, struct aux_map_buffer);
269 if (!buf)
270 return false;
271
272 const uint32_t size = 0x100000;
273 buf->buffer = ctx->buffer_alloc->alloc(ctx->driver_ctx, size);
274 if (!buf->buffer) {
275 ralloc_free(buf);
276 return false;
277 }
278
279 assert(buf->buffer->map != NULL);
280
281 list_addtail(&buf->link, &ctx->buffers);
282 ctx->tail_offset = 0;
283 ctx->tail_remaining = size;
284 p_atomic_inc(&ctx->num_buffers);
285
286 return true;
287 }
288
289 static void
advance_current_pos(struct intel_aux_map_context * ctx,uint32_t size)290 advance_current_pos(struct intel_aux_map_context *ctx, uint32_t size)
291 {
292 assert(ctx->tail_remaining >= size);
293 ctx->tail_remaining -= size;
294 ctx->tail_offset += size;
295 }
296
297 static bool
align_and_verify_space(struct intel_aux_map_context * ctx,uint32_t size,uint32_t alignment)298 align_and_verify_space(struct intel_aux_map_context *ctx, uint32_t size,
299 uint32_t alignment)
300 {
301 if (ctx->tail_remaining < size)
302 return false;
303
304 struct aux_map_buffer *tail =
305 list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
306 uint64_t gpu = tail->buffer->gpu + ctx->tail_offset;
307 uint64_t aligned = align64(gpu, alignment);
308
309 if ((aligned - gpu) + size > ctx->tail_remaining) {
310 return false;
311 } else {
312 if (aligned - gpu > 0)
313 advance_current_pos(ctx, aligned - gpu);
314 return true;
315 }
316 }
317
318 static void
get_current_pos(struct intel_aux_map_context * ctx,uint64_t * gpu,uint64_t ** map)319 get_current_pos(struct intel_aux_map_context *ctx, uint64_t *gpu, uint64_t **map)
320 {
321 assert(!list_is_empty(&ctx->buffers));
322 struct aux_map_buffer *tail =
323 list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
324 if (gpu)
325 *gpu = tail->buffer->gpu + ctx->tail_offset;
326 if (map)
327 *map = (uint64_t*)((uint8_t*)tail->buffer->map + ctx->tail_offset);
328 }
329
330 static struct intel_aux_level *
add_sub_table(struct intel_aux_map_context * ctx,struct intel_aux_level * parent,uint32_t parent_index,uint32_t size,uint32_t align)331 add_sub_table(struct intel_aux_map_context *ctx,
332 struct intel_aux_level *parent,
333 uint32_t parent_index,
334 uint32_t size, uint32_t align)
335 {
336 if (!align_and_verify_space(ctx, size, align)) {
337 if (!add_buffer(ctx))
338 return NULL;
339 UNUSED bool aligned = align_and_verify_space(ctx, size, align);
340 assert(aligned);
341 }
342
343 struct intel_aux_level *level = rzalloc(ctx, struct intel_aux_level);
344
345 get_current_pos(ctx, &level->address, &level->entries);
346 memset(level->entries, 0, size);
347 advance_current_pos(ctx, size);
348
349 if (parent != NULL) {
350 assert(parent->children[parent_index] == NULL);
351 parent->children[parent_index] = level;
352 }
353
354 return level;
355 }
356
357 uint32_t
intel_aux_map_get_state_num(struct intel_aux_map_context * ctx)358 intel_aux_map_get_state_num(struct intel_aux_map_context *ctx)
359 {
360 return p_atomic_read(&ctx->state_num);
361 }
362
363 struct intel_aux_map_context *
intel_aux_map_init(void * driver_ctx,struct intel_mapped_pinned_buffer_alloc * buffer_alloc,const struct intel_device_info * devinfo)364 intel_aux_map_init(void *driver_ctx,
365 struct intel_mapped_pinned_buffer_alloc *buffer_alloc,
366 const struct intel_device_info *devinfo)
367 {
368 struct intel_aux_map_context *ctx;
369
370 enum intel_aux_map_format format = select_format(devinfo);
371 if (format == INTEL_AUX_MAP_LAST)
372 return NULL;
373
374 ctx = ralloc(NULL, struct intel_aux_map_context);
375 if (!ctx)
376 return NULL;
377
378 if (pthread_mutex_init(&ctx->mutex, NULL))
379 return NULL;
380
381 ctx->format = get_format(format);
382 ctx->driver_ctx = driver_ctx;
383 ctx->buffer_alloc = buffer_alloc;
384 ctx->num_buffers = 0;
385 list_inithead(&ctx->buffers);
386 ctx->tail_offset = 0;
387 ctx->tail_remaining = 0;
388 ctx->state_num = 0;
389
390 ctx->l3_level = add_sub_table(ctx, NULL, 0,
391 L3_L2_SUB_TABLE_LEN, L3_L2_SUB_TABLE_LEN);
392 if (ctx->l3_level != NULL) {
393 if (aux_map_debug)
394 fprintf(stderr, "AUX-MAP L3: 0x%"PRIx64", map=%p\n",
395 ctx->l3_level->address, ctx->l3_level->entries);
396 p_atomic_inc(&ctx->state_num);
397 return ctx;
398 } else {
399 ralloc_free(ctx);
400 return NULL;
401 }
402 }
403
404 void
intel_aux_map_finish(struct intel_aux_map_context * ctx)405 intel_aux_map_finish(struct intel_aux_map_context *ctx)
406 {
407 if (!ctx)
408 return;
409
410 pthread_mutex_destroy(&ctx->mutex);
411 list_for_each_entry_safe(struct aux_map_buffer, buf, &ctx->buffers, link) {
412 ctx->buffer_alloc->free(ctx->driver_ctx, buf->buffer);
413 list_del(&buf->link);
414 p_atomic_dec(&ctx->num_buffers);
415 ralloc_free(buf);
416 }
417
418 ralloc_free(ctx);
419 }
420
421 uint32_t
intel_aux_map_get_alignment(struct intel_aux_map_context * ctx)422 intel_aux_map_get_alignment(struct intel_aux_map_context *ctx)
423 {
424 return ctx->format->main_page_size;
425 }
426
427 uint64_t
intel_aux_map_get_base(struct intel_aux_map_context * ctx)428 intel_aux_map_get_base(struct intel_aux_map_context *ctx)
429 {
430 /**
431 * This get initialized in intel_aux_map_init, and never changes, so there is
432 * no need to lock the mutex.
433 */
434 return ctx->l3_level->address;
435 }
436
437 static uint8_t
get_bpp_encoding(enum isl_format format)438 get_bpp_encoding(enum isl_format format)
439 {
440 if (isl_format_is_yuv(format)) {
441 switch (format) {
442 case ISL_FORMAT_YCRCB_NORMAL:
443 case ISL_FORMAT_YCRCB_SWAPY:
444 case ISL_FORMAT_PLANAR_420_8: return 3;
445 case ISL_FORMAT_PLANAR_420_12: return 2;
446 case ISL_FORMAT_PLANAR_420_10: return 1;
447 case ISL_FORMAT_PLANAR_420_16: return 0;
448 default:
449 unreachable("Unsupported format!");
450 return 0;
451 }
452 } else {
453 switch (isl_format_get_layout(format)->bpb) {
454 case 16: return 0;
455 case 8: return 4;
456 case 32: return 5;
457 case 64: return 6;
458 case 128: return 7;
459 default:
460 unreachable("Unsupported bpp!");
461 return 0;
462 }
463 }
464 }
465
466 #define INTEL_AUX_MAP_ENTRY_Ys_TILED_BIT (0x0ull << 52)
467 #define INTEL_AUX_MAP_ENTRY_Y_TILED_BIT (0x1ull << 52)
468
469 uint64_t
intel_aux_map_format_bits(enum isl_tiling tiling,enum isl_format format,uint8_t plane)470 intel_aux_map_format_bits(enum isl_tiling tiling, enum isl_format format,
471 uint8_t plane)
472 {
473 /* gfx12.5+ uses tile-4 rather than y-tiling, and gfx12.5+ also uses
474 * compression info from the surface state and ignores the aux-map format
475 * bits metadata.
476 */
477 if (!isl_tiling_is_any_y(tiling))
478 return 0;
479
480 if (aux_map_debug)
481 fprintf(stderr, "AUX-MAP entry %s, bpp_enc=%d\n",
482 isl_format_get_name(format),
483 isl_format_get_aux_map_encoding(format));
484
485 assert(tiling == ISL_TILING_ICL_Ys ||
486 tiling == ISL_TILING_ICL_Yf ||
487 tiling == ISL_TILING_Y0);
488
489 uint64_t format_bits =
490 ((uint64_t)isl_format_get_aux_map_encoding(format) << 58) |
491 ((uint64_t)(plane > 0) << 57) |
492 ((uint64_t)get_bpp_encoding(format) << 54) |
493 /* TODO: We assume that Yf is not Tiled-Ys, but waiting on
494 * clarification
495 */
496 (tiling == ISL_TILING_ICL_Ys ? INTEL_AUX_MAP_ENTRY_Ys_TILED_BIT :
497 INTEL_AUX_MAP_ENTRY_Y_TILED_BIT);
498
499 assert((format_bits & INTEL_AUX_MAP_FORMAT_BITS_MASK) == format_bits);
500
501 return format_bits;
502 }
503
504 uint64_t
intel_aux_map_format_bits_for_isl_surf(const struct isl_surf * isl_surf)505 intel_aux_map_format_bits_for_isl_surf(const struct isl_surf *isl_surf)
506 {
507 assert(!isl_format_is_planar(isl_surf->format));
508 return intel_aux_map_format_bits(isl_surf->tiling, isl_surf->format, 0);
509 }
510
511 static uint64_t
get_l1_addr_mask(struct intel_aux_map_context * ctx)512 get_l1_addr_mask(struct intel_aux_map_context *ctx)
513 {
514 uint64_t l1_addr = ~get_page_mask(ctx->format->l1_page_size);
515 return l1_addr & VALID_ADDRESS_MASK;
516 }
517
518 static void
get_aux_entry(struct intel_aux_map_context * ctx,uint64_t main_address,uint32_t * l1_index_out,uint64_t * l1_entry_addr_out,uint64_t ** l1_entry_map_out,struct intel_aux_level ** l1_aux_level_out)519 get_aux_entry(struct intel_aux_map_context *ctx, uint64_t main_address,
520 uint32_t *l1_index_out, uint64_t *l1_entry_addr_out,
521 uint64_t **l1_entry_map_out,
522 struct intel_aux_level **l1_aux_level_out)
523 {
524 struct intel_aux_level *l3_level = ctx->l3_level;
525 struct intel_aux_level *l2_level;
526 struct intel_aux_level *l1_level;
527
528 uint32_t l3_index = (main_address >> 36) & 0xfff;
529
530 if (l3_level->children[l3_index] == NULL) {
531 l2_level =
532 add_sub_table(ctx, ctx->l3_level, l3_index,
533 L3_L2_SUB_TABLE_LEN, L3_L2_SUB_TABLE_LEN);
534 if (l2_level != NULL) {
535 if (aux_map_debug)
536 fprintf(stderr, "AUX-MAP L3[0x%x]: 0x%"PRIx64", map=%p\n",
537 l3_index, l2_level->address, l2_level->entries);
538 } else {
539 unreachable("Failed to add L2 Aux-Map Page Table!");
540 }
541 l3_level->entries[l3_index] = (l2_level->address & L3_ENTRY_L2_ADDR_MASK) |
542 INTEL_AUX_MAP_ENTRY_VALID_BIT;
543 } else {
544 l2_level = l3_level->children[l3_index];
545 }
546 uint32_t l2_index = (main_address >> 24) & 0xfff;
547 uint64_t l1_page_size = ctx->format->l1_page_size;
548 if (l2_level->children[l2_index] == NULL) {
549 l1_level = add_sub_table(ctx, l2_level, l2_index, l1_page_size, l1_page_size);
550 if (l1_level != NULL) {
551 if (aux_map_debug)
552 fprintf(stderr, "AUX-MAP L2[0x%x]: 0x%"PRIx64", map=%p\n",
553 l2_index, l1_level->address, l1_level->entries);
554 } else {
555 unreachable("Failed to add L1 Aux-Map Page Table!");
556 }
557 l2_level->entries[l2_index] = (l1_level->address & get_l1_addr_mask(ctx)) |
558 INTEL_AUX_MAP_ENTRY_VALID_BIT;
559 } else {
560 l1_level = l2_level->children[l2_index];
561 }
562 uint32_t l1_index = get_index(main_address, ctx->format->l1_index_mask,
563 ctx->format->l1_index_offset);
564 if (l1_index_out)
565 *l1_index_out = l1_index;
566 if (l1_entry_addr_out)
567 *l1_entry_addr_out = intel_canonical_address(l1_level->address + l1_index * sizeof(uint64_t));
568 if (l1_entry_map_out)
569 *l1_entry_map_out = &l1_level->entries[l1_index];
570 if (l1_aux_level_out)
571 *l1_aux_level_out = l1_level;
572 }
573
574 static bool
add_mapping(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t aux_address,uint64_t format_bits,bool * state_changed)575 add_mapping(struct intel_aux_map_context *ctx, uint64_t main_address,
576 uint64_t aux_address, uint64_t format_bits,
577 bool *state_changed)
578 {
579 if (aux_map_debug)
580 fprintf(stderr, "AUX-MAP 0x%"PRIx64" => 0x%"PRIx64"\n", main_address,
581 aux_address);
582
583 uint32_t l1_index;
584 uint64_t *l1_entry;
585 struct intel_aux_level *l1_aux_level;
586 get_aux_entry(ctx, main_address, &l1_index, NULL, &l1_entry, &l1_aux_level);
587
588 const uint64_t l1_data =
589 (aux_address & intel_aux_get_meta_address_mask(ctx)) |
590 format_bits |
591 INTEL_AUX_MAP_ENTRY_VALID_BIT;
592
593 const uint64_t current_l1_data = *l1_entry;
594 if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
595 assert(l1_aux_level->ref_counts[l1_index] == 0);
596 assert((aux_address & 0xffULL) == 0);
597 if (aux_map_debug)
598 fprintf(stderr, "AUX-MAP L1[0x%x] 0x%"PRIx64" -> 0x%"PRIx64"\n",
599 l1_index, current_l1_data, l1_data);
600 /**
601 * We use non-zero bits in 63:1 to indicate the entry had been filled
602 * previously. If these bits are non-zero and they don't exactly match
603 * what we want to program into the entry, then we must force the
604 * aux-map tables to be flushed.
605 */
606 if (current_l1_data != 0 && \
607 (current_l1_data | INTEL_AUX_MAP_ENTRY_VALID_BIT) != l1_data)
608 *state_changed = true;
609 *l1_entry = l1_data;
610 } else {
611 if (aux_map_debug)
612 fprintf(stderr, "AUX-MAP L1[0x%x] is already marked valid!\n",
613 l1_index);
614
615 if (*l1_entry != l1_data) {
616 if (aux_map_debug)
617 fprintf(stderr,
618 "AUX-MAP L1[0x%x] overwrite 0x%"PRIx64" != 0x%"PRIx64"\n",
619 l1_index, current_l1_data, l1_data);
620
621 return false;
622 }
623 }
624
625 l1_aux_level->ref_counts[l1_index]++;
626
627 return true;
628 }
629
630 uint64_t *
intel_aux_map_get_entry(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t * aux_entry_address)631 intel_aux_map_get_entry(struct intel_aux_map_context *ctx,
632 uint64_t main_address,
633 uint64_t *aux_entry_address)
634 {
635 pthread_mutex_lock(&ctx->mutex);
636 uint64_t *l1_entry_map;
637 get_aux_entry(ctx, main_address, NULL, aux_entry_address, &l1_entry_map, NULL);
638 pthread_mutex_unlock(&ctx->mutex);
639
640 return l1_entry_map;
641 }
642
643 /**
644 * We mark the leaf entry as invalid, but we don't attempt to cleanup the
645 * other levels of translation mappings. Since we attempt to re-use VMA
646 * ranges, hopefully this will not lead to unbounded growth of the translation
647 * tables.
648 */
649 static void
remove_l1_mapping_locked(struct intel_aux_map_context * ctx,uint64_t main_address,bool reset_refcount,bool * state_changed)650 remove_l1_mapping_locked(struct intel_aux_map_context *ctx, uint64_t main_address,
651 bool reset_refcount, bool *state_changed)
652 {
653 uint32_t l1_index;
654 uint64_t *l1_entry;
655 struct intel_aux_level *l1_aux_level;
656 get_aux_entry(ctx, main_address, &l1_index, NULL, &l1_entry, &l1_aux_level);
657
658 const uint64_t current_l1_data = *l1_entry;
659 const uint64_t l1_data = current_l1_data & ~INTEL_AUX_MAP_ENTRY_VALID_BIT;
660
661 if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
662 assert(l1_aux_level->ref_counts[l1_index] == 0);
663 return;
664 } else if (reset_refcount) {
665 l1_aux_level->ref_counts[l1_index] = 0;
666 if (unlikely(l1_data == 0))
667 *state_changed = true;
668 *l1_entry = l1_data;
669 } else {
670 assert(l1_aux_level->ref_counts[l1_index] > 0);
671 if (--l1_aux_level->ref_counts[l1_index] == 0) {
672 /**
673 * We use non-zero bits in 63:1 to indicate the entry had been filled
674 * previously. In the unlikely event that these are all zero, we
675 * force a flush of the aux-map tables.
676 */
677 if (unlikely(l1_data == 0))
678 *state_changed = true;
679 *l1_entry = l1_data;
680 }
681 }
682 }
683
684 static void
remove_mapping_locked(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t size,bool reset_refcount,bool * state_changed)685 remove_mapping_locked(struct intel_aux_map_context *ctx, uint64_t main_address,
686 uint64_t size, bool reset_refcount, bool *state_changed)
687 {
688 if (aux_map_debug)
689 fprintf(stderr, "AUX-MAP remove 0x%"PRIx64"-0x%"PRIx64"\n", main_address,
690 main_address + size);
691
692 uint64_t main_inc_addr = main_address;
693 uint64_t main_page_size = ctx->format->main_page_size;
694 assert((main_address & get_page_mask(main_page_size)) == 0);
695 while (main_inc_addr - main_address < size) {
696 remove_l1_mapping_locked(ctx, main_inc_addr, reset_refcount,
697 state_changed);
698 main_inc_addr += main_page_size;
699 }
700 }
701
702 bool
intel_aux_map_add_mapping(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t aux_address,uint64_t main_size_B,uint64_t format_bits)703 intel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t main_address,
704 uint64_t aux_address, uint64_t main_size_B,
705 uint64_t format_bits)
706 {
707 bool state_changed = false;
708 pthread_mutex_lock(&ctx->mutex);
709 uint64_t main_inc_addr = main_address;
710 uint64_t aux_inc_addr = aux_address;
711 const uint64_t main_page_size = ctx->format->main_page_size;
712 assert((main_address & get_page_mask(main_page_size)) == 0);
713 const uint64_t aux_page_size = get_meta_page_size(ctx->format);
714 assert((aux_address & get_page_mask(aux_page_size)) == 0);
715 while (main_inc_addr - main_address < main_size_B) {
716 if (!add_mapping(ctx, main_inc_addr, aux_inc_addr, format_bits,
717 &state_changed)) {
718 break;
719 }
720 main_inc_addr = main_inc_addr + main_page_size;
721 aux_inc_addr = aux_inc_addr + aux_page_size;
722 }
723 bool success = main_inc_addr - main_address >= main_size_B;
724 if (!success && (main_inc_addr > main_address)) {
725 /* If the mapping failed, remove the mapped portion. */
726 remove_mapping_locked(ctx, main_address,
727 main_inc_addr - main_address,
728 false /* reset_refcount */, &state_changed);
729 }
730 pthread_mutex_unlock(&ctx->mutex);
731 if (state_changed)
732 p_atomic_inc(&ctx->state_num);
733
734
735 return success;
736 }
737
738 void
intel_aux_map_del_mapping(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t size)739 intel_aux_map_del_mapping(struct intel_aux_map_context *ctx, uint64_t main_address,
740 uint64_t size)
741 {
742 bool state_changed = false;
743 pthread_mutex_lock(&ctx->mutex);
744 remove_mapping_locked(ctx, main_address, size, false /* reset_refcount */,
745 &state_changed);
746 pthread_mutex_unlock(&ctx->mutex);
747 if (state_changed)
748 p_atomic_inc(&ctx->state_num);
749 }
750
751 void
intel_aux_map_unmap_range(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t size)752 intel_aux_map_unmap_range(struct intel_aux_map_context *ctx, uint64_t main_address,
753 uint64_t size)
754 {
755 bool state_changed = false;
756 pthread_mutex_lock(&ctx->mutex);
757 remove_mapping_locked(ctx, main_address, size, true /* reset_refcount */,
758 &state_changed);
759 pthread_mutex_unlock(&ctx->mutex);
760 if (state_changed)
761 p_atomic_inc(&ctx->state_num);
762 }
763
764 uint32_t
intel_aux_map_get_num_buffers(struct intel_aux_map_context * ctx)765 intel_aux_map_get_num_buffers(struct intel_aux_map_context *ctx)
766 {
767 return p_atomic_read(&ctx->num_buffers);
768 }
769
770 void
intel_aux_map_fill_bos(struct intel_aux_map_context * ctx,void ** driver_bos,uint32_t max_bos)771 intel_aux_map_fill_bos(struct intel_aux_map_context *ctx, void **driver_bos,
772 uint32_t max_bos)
773 {
774 assert(p_atomic_read(&ctx->num_buffers) >= max_bos);
775 uint32_t i = 0;
776 list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) {
777 if (i >= max_bos)
778 return;
779 driver_bos[i++] = buf->buffer->driver_bo;
780 }
781 }
782