xref: /aosp_15_r20/external/mesa3d/src/intel/common/intel_aux_map.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * The aux map provides a multi-level lookup of the main surface address which
26  * ends up providing information about the auxiliary surface data, including
27  * the address where the auxiliary data resides.
28  *
29  * The below sections depict address splitting and formats of table entries of
30  * TGL platform. These may vary on other platforms.
31  *
32  * The 48-bit VMA (GPU) address of the main surface is split to do the address
33  * lookup:
34  *
35  *  48 bit address of main surface
36  * +--------+--------+--------+------+
37  * | 47:36  | 35:24  | 23:16  | 15:0 |
38  * | L3-idx | L2-idx | L1-idx | ...  |
39  * +--------+--------+--------+------+
40  *
41  * The GFX_AUX_TABLE_BASE_ADDR points to a buffer. The L3 Table Entry is
42  * located by indexing into this buffer as a uint64_t array using the L3-idx
43  * value. The 64-bit L3 entry is defined as:
44  *
45  * +-------+-------------+------+---+
46  * | 63:48 | 47:15       | 14:1 | 0 |
47  * |  ...  | L2-tbl-addr | ...  | V |
48  * +-------+-------------+------+---+
49  *
50  * If the `V` (valid) bit is set, then the L2-tbl-addr gives the address for
51  * the level-2 table entries, with the lower address bits filled with zero.
52  * The L2 Table Entry is located by indexing into this buffer as a uint64_t
53  * array using the L2-idx value. The 64-bit L2 entry is similar to the L3
54  * entry, except with 2 additional address bits:
55  *
56  * +-------+-------------+------+---+
57  * | 63:48 | 47:13       | 12:1 | 0 |
58  * |  ...  | L1-tbl-addr | ...  | V |
59  * +-------+-------------+------+---+
60  *
61  * If the `V` bit is set, then the L1-tbl-addr gives the address for the
62  * level-1 table entries, with the lower address bits filled with zero. The L1
63  * Table Entry is located by indexing into this buffer as a uint64_t array
64  * using the L1-idx value. The 64-bit L1 entry is defined as:
65  *
66  * +--------+------+-------+-------+-------+---------------+-----+---+
67  * | 63:58  | 57   | 56:54 | 53:52 | 51:48 | 47:8          | 7:1 | 0 |
68  * | Format | Y/Cr | Depth |  TM   |  ...  | aux-data-addr | ... | V |
69  * +--------+------+-------+-------+-------+---------------+-----+---+
70  *
71  * Where:
72  *  - Format: See `get_format_encoding`
73  *  - Y/Cr: 0=Y(Luma), 1=Cr(Chroma)
74  *  - (bit) Depth: See `get_bpp_encoding`
75  *  - TM (Tile-mode): 0=Ys, 1=Y, 2=rsvd, 3=rsvd
76  *  - aux-data-addr: VMA/GPU address for the aux-data
77  *  - V: entry is valid
78  *
79  * BSpec 44930
80  */
81 
82 #include "intel_aux_map.h"
83 #include "intel_gem.h"
84 
85 #include "dev/intel_device_info.h"
86 #include "isl/isl.h"
87 
88 #include "util/list.h"
89 #include "util/ralloc.h"
90 #include "util/u_atomic.h"
91 #include "util/u_math.h"
92 
93 #include <inttypes.h>
94 #include <stdlib.h>
95 #include <stdio.h>
96 #include <pthread.h>
97 
98 #define INTEL_AUX_MAP_FORMAT_BITS_MASK   0xfff0000000000000ull
99 
100 /* Mask with the firt 48bits set */
101 #define VALID_ADDRESS_MASK ((1ull << 48) - 1)
102 
103 #define L3_ENTRY_L2_ADDR_MASK 0xffffffff8000ull
104 
105 #define L3_L2_BITS_PER_LEVEL 12
106 #define L3_L2_SUB_TABLE_LEN (sizeof(uint64_t) * (1ull << L3_L2_BITS_PER_LEVEL))
107 
108 static const bool aux_map_debug = false;
109 
110 /**
111  * Auxiliary surface mapping formats
112  *
113  * Several formats of AUX mapping exist. The supported formats
114  * are designated by generation and granularity here. A device
115  * can support more than one format, depending on Hardware, but
116  * we expect only one of them of a device is needed. Otherwise,
117  * we could need to change this enum into a bit map in such case
118  * later.
119  */
120 enum intel_aux_map_format {
121    /**
122     * 64KB granularity format on GFX12 devices
123     */
124    INTEL_AUX_MAP_GFX12_64KB = 0,
125 
126    /**
127     * 1MB granularity format on GFX125 devices
128     */
129    INTEL_AUX_MAP_GFX125_1MB,
130 
131    INTEL_AUX_MAP_LAST,
132 };
133 
134 /**
135  * An incomplete description of AUX mapping formats
136  *
137  * Theoretically, many things can be different, depending on hardware
138  * design like level of page tables, address splitting, format bits
139  * etc. We only manage the known delta to simplify the implementation
140  * this time.
141  */
142 struct aux_format_info {
143    /**
144     * Granularity of main surface in compression. It must be power of 2.
145     */
146    uint64_t main_page_size;
147    /**
148     * Page size of level 1 page table. It must be power of 2.
149     */
150    uint64_t l1_page_size;
151    /**
152     * Mask of index bits of level 1 page table in address splitting.
153     */
154    uint64_t l1_index_mask;
155    /**
156     * Offset of index bits of level 1 page table in address splitting.
157     */
158    uint64_t l1_index_offset;
159 };
160 
161 static const struct aux_format_info aux_formats[] = {
162    [INTEL_AUX_MAP_GFX12_64KB] = {
163       .main_page_size = 64 * 1024,
164       .l1_page_size = 8 * 1024,
165       .l1_index_mask = 0xff,
166       .l1_index_offset = 16,
167    },
168    [INTEL_AUX_MAP_GFX125_1MB] = {
169       .main_page_size = 1024 * 1024,
170       .l1_page_size = 2 * 1024,
171       .l1_index_mask = 0xf,
172       .l1_index_offset = 20,
173    },
174 };
175 
176 struct aux_map_buffer {
177    struct list_head link;
178    struct intel_buffer *buffer;
179 };
180 
181 struct intel_aux_level {
182    /* GPU address of the  current level */
183    uint64_t address;
184 
185    /* Pointer to the GPU entries of this level */
186    uint64_t *entries;
187 
188    union {
189       /* Host tracking of a parent level to its children (only use on L3/L2
190        * levels which have 4096 entries)
191        */
192       struct intel_aux_level *children[4096];
193 
194       /* Refcount of AUX pages at the L1 level (MTL has only 16 entries in L1,
195        * which Gfx12 has 256 entries)
196        */
197       uint32_t ref_counts[256];
198    };
199 };
200 
201 struct intel_aux_map_context {
202    void *driver_ctx;
203    pthread_mutex_t mutex;
204    struct intel_aux_level *l3_level;
205    struct intel_mapped_pinned_buffer_alloc *buffer_alloc;
206    uint32_t num_buffers;
207    struct list_head buffers;
208    uint32_t tail_offset, tail_remaining;
209    uint32_t state_num;
210    const struct aux_format_info *format;
211 };
212 
213 static inline uint64_t
get_page_mask(const uint64_t page_size)214 get_page_mask(const uint64_t page_size)
215 {
216    return page_size - 1;
217 }
218 
219 static inline uint64_t
get_meta_page_size(const struct aux_format_info * info)220 get_meta_page_size(const struct aux_format_info *info)
221 {
222    return info->main_page_size / INTEL_AUX_MAP_MAIN_SIZE_SCALEDOWN;
223 }
224 
225 static inline uint64_t
get_index(const uint64_t main_address,const uint64_t index_mask,const uint64_t index_offset)226 get_index(const uint64_t main_address,
227       const uint64_t index_mask, const uint64_t index_offset)
228 {
229    return (main_address >> index_offset) & index_mask;
230 }
231 
232 uint64_t
intel_aux_get_meta_address_mask(struct intel_aux_map_context * ctx)233 intel_aux_get_meta_address_mask(struct intel_aux_map_context *ctx)
234 {
235    return (~get_page_mask(get_meta_page_size(ctx->format))) & VALID_ADDRESS_MASK;
236 }
237 
238 uint64_t
intel_aux_main_to_aux_offset(struct intel_aux_map_context * ctx,uint64_t main_offset)239 intel_aux_main_to_aux_offset(struct intel_aux_map_context *ctx,
240                              uint64_t main_offset)
241 {
242    return main_offset / INTEL_AUX_MAP_MAIN_SIZE_SCALEDOWN;
243 }
244 
245 static const struct aux_format_info *
get_format(enum intel_aux_map_format format)246 get_format(enum intel_aux_map_format format)
247 {
248 
249    assert(format < INTEL_AUX_MAP_LAST);
250    assert(ARRAY_SIZE(aux_formats) == INTEL_AUX_MAP_LAST);
251    return &aux_formats[format];
252 }
253 
254 static enum intel_aux_map_format
select_format(const struct intel_device_info * devinfo)255 select_format(const struct intel_device_info *devinfo)
256 {
257    if (devinfo->verx10 >= 125)
258       return INTEL_AUX_MAP_GFX125_1MB;
259    else if (devinfo->verx10 == 120)
260       return INTEL_AUX_MAP_GFX12_64KB;
261    else
262       return INTEL_AUX_MAP_LAST;
263 }
264 
265 static bool
add_buffer(struct intel_aux_map_context * ctx)266 add_buffer(struct intel_aux_map_context *ctx)
267 {
268    struct aux_map_buffer *buf = rzalloc(ctx, struct aux_map_buffer);
269    if (!buf)
270       return false;
271 
272    const uint32_t size = 0x100000;
273    buf->buffer = ctx->buffer_alloc->alloc(ctx->driver_ctx, size);
274    if (!buf->buffer) {
275       ralloc_free(buf);
276       return false;
277    }
278 
279    assert(buf->buffer->map != NULL);
280 
281    list_addtail(&buf->link, &ctx->buffers);
282    ctx->tail_offset = 0;
283    ctx->tail_remaining = size;
284    p_atomic_inc(&ctx->num_buffers);
285 
286    return true;
287 }
288 
289 static void
advance_current_pos(struct intel_aux_map_context * ctx,uint32_t size)290 advance_current_pos(struct intel_aux_map_context *ctx, uint32_t size)
291 {
292    assert(ctx->tail_remaining >= size);
293    ctx->tail_remaining -= size;
294    ctx->tail_offset += size;
295 }
296 
297 static bool
align_and_verify_space(struct intel_aux_map_context * ctx,uint32_t size,uint32_t alignment)298 align_and_verify_space(struct intel_aux_map_context *ctx, uint32_t size,
299                        uint32_t alignment)
300 {
301    if (ctx->tail_remaining < size)
302       return false;
303 
304    struct aux_map_buffer *tail =
305       list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
306    uint64_t gpu = tail->buffer->gpu + ctx->tail_offset;
307    uint64_t aligned = align64(gpu, alignment);
308 
309    if ((aligned - gpu) + size > ctx->tail_remaining) {
310       return false;
311    } else {
312       if (aligned - gpu > 0)
313          advance_current_pos(ctx, aligned - gpu);
314       return true;
315    }
316 }
317 
318 static void
get_current_pos(struct intel_aux_map_context * ctx,uint64_t * gpu,uint64_t ** map)319 get_current_pos(struct intel_aux_map_context *ctx, uint64_t *gpu, uint64_t **map)
320 {
321    assert(!list_is_empty(&ctx->buffers));
322    struct aux_map_buffer *tail =
323       list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
324    if (gpu)
325       *gpu = tail->buffer->gpu + ctx->tail_offset;
326    if (map)
327       *map = (uint64_t*)((uint8_t*)tail->buffer->map + ctx->tail_offset);
328 }
329 
330 static struct intel_aux_level *
add_sub_table(struct intel_aux_map_context * ctx,struct intel_aux_level * parent,uint32_t parent_index,uint32_t size,uint32_t align)331 add_sub_table(struct intel_aux_map_context *ctx,
332               struct intel_aux_level *parent,
333               uint32_t parent_index,
334               uint32_t size, uint32_t align)
335 {
336    if (!align_and_verify_space(ctx, size, align)) {
337       if (!add_buffer(ctx))
338          return NULL;
339       UNUSED bool aligned = align_and_verify_space(ctx, size, align);
340       assert(aligned);
341    }
342 
343    struct intel_aux_level *level = rzalloc(ctx, struct intel_aux_level);
344 
345    get_current_pos(ctx, &level->address, &level->entries);
346    memset(level->entries, 0, size);
347    advance_current_pos(ctx, size);
348 
349    if (parent != NULL) {
350       assert(parent->children[parent_index] == NULL);
351       parent->children[parent_index] = level;
352    }
353 
354    return level;
355 }
356 
357 uint32_t
intel_aux_map_get_state_num(struct intel_aux_map_context * ctx)358 intel_aux_map_get_state_num(struct intel_aux_map_context *ctx)
359 {
360    return p_atomic_read(&ctx->state_num);
361 }
362 
363 struct intel_aux_map_context *
intel_aux_map_init(void * driver_ctx,struct intel_mapped_pinned_buffer_alloc * buffer_alloc,const struct intel_device_info * devinfo)364 intel_aux_map_init(void *driver_ctx,
365                    struct intel_mapped_pinned_buffer_alloc *buffer_alloc,
366                    const struct intel_device_info *devinfo)
367 {
368    struct intel_aux_map_context *ctx;
369 
370    enum intel_aux_map_format format = select_format(devinfo);
371    if (format == INTEL_AUX_MAP_LAST)
372       return NULL;
373 
374    ctx = ralloc(NULL, struct intel_aux_map_context);
375    if (!ctx)
376       return NULL;
377 
378    if (pthread_mutex_init(&ctx->mutex, NULL))
379       return NULL;
380 
381    ctx->format = get_format(format);
382    ctx->driver_ctx = driver_ctx;
383    ctx->buffer_alloc = buffer_alloc;
384    ctx->num_buffers = 0;
385    list_inithead(&ctx->buffers);
386    ctx->tail_offset = 0;
387    ctx->tail_remaining = 0;
388    ctx->state_num = 0;
389 
390    ctx->l3_level = add_sub_table(ctx, NULL, 0,
391                                  L3_L2_SUB_TABLE_LEN, L3_L2_SUB_TABLE_LEN);
392    if (ctx->l3_level != NULL) {
393       if (aux_map_debug)
394          fprintf(stderr, "AUX-MAP L3: 0x%"PRIx64", map=%p\n",
395                  ctx->l3_level->address, ctx->l3_level->entries);
396       p_atomic_inc(&ctx->state_num);
397       return ctx;
398    } else {
399       ralloc_free(ctx);
400       return NULL;
401    }
402 }
403 
404 void
intel_aux_map_finish(struct intel_aux_map_context * ctx)405 intel_aux_map_finish(struct intel_aux_map_context *ctx)
406 {
407    if (!ctx)
408       return;
409 
410    pthread_mutex_destroy(&ctx->mutex);
411    list_for_each_entry_safe(struct aux_map_buffer, buf, &ctx->buffers, link) {
412       ctx->buffer_alloc->free(ctx->driver_ctx, buf->buffer);
413       list_del(&buf->link);
414       p_atomic_dec(&ctx->num_buffers);
415       ralloc_free(buf);
416    }
417 
418    ralloc_free(ctx);
419 }
420 
421 uint32_t
intel_aux_map_get_alignment(struct intel_aux_map_context * ctx)422 intel_aux_map_get_alignment(struct intel_aux_map_context *ctx)
423 {
424    return ctx->format->main_page_size;
425 }
426 
427 uint64_t
intel_aux_map_get_base(struct intel_aux_map_context * ctx)428 intel_aux_map_get_base(struct intel_aux_map_context *ctx)
429 {
430    /**
431     * This get initialized in intel_aux_map_init, and never changes, so there is
432     * no need to lock the mutex.
433     */
434    return ctx->l3_level->address;
435 }
436 
437 static uint8_t
get_bpp_encoding(enum isl_format format)438 get_bpp_encoding(enum isl_format format)
439 {
440    if (isl_format_is_yuv(format)) {
441       switch (format) {
442       case ISL_FORMAT_YCRCB_NORMAL:
443       case ISL_FORMAT_YCRCB_SWAPY:
444       case ISL_FORMAT_PLANAR_420_8: return 3;
445       case ISL_FORMAT_PLANAR_420_12: return 2;
446       case ISL_FORMAT_PLANAR_420_10: return 1;
447       case ISL_FORMAT_PLANAR_420_16: return 0;
448       default:
449          unreachable("Unsupported format!");
450          return 0;
451       }
452    } else {
453       switch (isl_format_get_layout(format)->bpb) {
454       case 16:  return 0;
455       case 8:   return 4;
456       case 32:  return 5;
457       case 64:  return 6;
458       case 128: return 7;
459       default:
460          unreachable("Unsupported bpp!");
461          return 0;
462       }
463    }
464 }
465 
466 #define INTEL_AUX_MAP_ENTRY_Ys_TILED_BIT  (0x0ull << 52)
467 #define INTEL_AUX_MAP_ENTRY_Y_TILED_BIT   (0x1ull << 52)
468 
469 uint64_t
intel_aux_map_format_bits(enum isl_tiling tiling,enum isl_format format,uint8_t plane)470 intel_aux_map_format_bits(enum isl_tiling tiling, enum isl_format format,
471                           uint8_t plane)
472 {
473    /* gfx12.5+ uses tile-4 rather than y-tiling, and gfx12.5+ also uses
474     * compression info from the surface state and ignores the aux-map format
475     * bits metadata.
476     */
477    if (!isl_tiling_is_any_y(tiling))
478       return 0;
479 
480    if (aux_map_debug)
481       fprintf(stderr, "AUX-MAP entry %s, bpp_enc=%d\n",
482               isl_format_get_name(format),
483               isl_format_get_aux_map_encoding(format));
484 
485    assert(tiling == ISL_TILING_ICL_Ys ||
486           tiling == ISL_TILING_ICL_Yf ||
487           tiling == ISL_TILING_Y0);
488 
489    uint64_t format_bits =
490       ((uint64_t)isl_format_get_aux_map_encoding(format) << 58) |
491       ((uint64_t)(plane > 0) << 57) |
492       ((uint64_t)get_bpp_encoding(format) << 54) |
493       /* TODO: We assume that Yf is not Tiled-Ys, but waiting on
494        *       clarification
495        */
496       (tiling == ISL_TILING_ICL_Ys ? INTEL_AUX_MAP_ENTRY_Ys_TILED_BIT :
497                                      INTEL_AUX_MAP_ENTRY_Y_TILED_BIT);
498 
499    assert((format_bits & INTEL_AUX_MAP_FORMAT_BITS_MASK) == format_bits);
500 
501    return format_bits;
502 }
503 
504 uint64_t
intel_aux_map_format_bits_for_isl_surf(const struct isl_surf * isl_surf)505 intel_aux_map_format_bits_for_isl_surf(const struct isl_surf *isl_surf)
506 {
507    assert(!isl_format_is_planar(isl_surf->format));
508    return intel_aux_map_format_bits(isl_surf->tiling, isl_surf->format, 0);
509 }
510 
511 static uint64_t
get_l1_addr_mask(struct intel_aux_map_context * ctx)512 get_l1_addr_mask(struct intel_aux_map_context *ctx)
513 {
514    uint64_t l1_addr = ~get_page_mask(ctx->format->l1_page_size);
515    return l1_addr & VALID_ADDRESS_MASK;
516 }
517 
518 static void
get_aux_entry(struct intel_aux_map_context * ctx,uint64_t main_address,uint32_t * l1_index_out,uint64_t * l1_entry_addr_out,uint64_t ** l1_entry_map_out,struct intel_aux_level ** l1_aux_level_out)519 get_aux_entry(struct intel_aux_map_context *ctx, uint64_t main_address,
520               uint32_t *l1_index_out, uint64_t *l1_entry_addr_out,
521               uint64_t **l1_entry_map_out,
522               struct intel_aux_level **l1_aux_level_out)
523 {
524    struct intel_aux_level *l3_level = ctx->l3_level;
525    struct intel_aux_level *l2_level;
526    struct intel_aux_level *l1_level;
527 
528    uint32_t l3_index = (main_address >> 36) & 0xfff;
529 
530    if (l3_level->children[l3_index] == NULL) {
531       l2_level =
532          add_sub_table(ctx, ctx->l3_level, l3_index,
533                        L3_L2_SUB_TABLE_LEN, L3_L2_SUB_TABLE_LEN);
534       if (l2_level != NULL) {
535          if (aux_map_debug)
536             fprintf(stderr, "AUX-MAP L3[0x%x]: 0x%"PRIx64", map=%p\n",
537                     l3_index, l2_level->address, l2_level->entries);
538       } else {
539          unreachable("Failed to add L2 Aux-Map Page Table!");
540       }
541       l3_level->entries[l3_index] = (l2_level->address & L3_ENTRY_L2_ADDR_MASK) |
542                                     INTEL_AUX_MAP_ENTRY_VALID_BIT;
543    } else {
544       l2_level = l3_level->children[l3_index];
545    }
546    uint32_t l2_index = (main_address >> 24) & 0xfff;
547    uint64_t l1_page_size = ctx->format->l1_page_size;
548    if (l2_level->children[l2_index] == NULL) {
549       l1_level = add_sub_table(ctx, l2_level, l2_index, l1_page_size, l1_page_size);
550       if (l1_level != NULL) {
551          if (aux_map_debug)
552             fprintf(stderr, "AUX-MAP L2[0x%x]: 0x%"PRIx64", map=%p\n",
553                     l2_index, l1_level->address, l1_level->entries);
554       } else {
555          unreachable("Failed to add L1 Aux-Map Page Table!");
556       }
557       l2_level->entries[l2_index] = (l1_level->address & get_l1_addr_mask(ctx)) |
558                                     INTEL_AUX_MAP_ENTRY_VALID_BIT;
559    } else {
560       l1_level = l2_level->children[l2_index];
561    }
562    uint32_t l1_index = get_index(main_address, ctx->format->l1_index_mask,
563                                  ctx->format->l1_index_offset);
564    if (l1_index_out)
565       *l1_index_out = l1_index;
566    if (l1_entry_addr_out)
567       *l1_entry_addr_out = intel_canonical_address(l1_level->address + l1_index * sizeof(uint64_t));
568    if (l1_entry_map_out)
569       *l1_entry_map_out = &l1_level->entries[l1_index];
570    if (l1_aux_level_out)
571       *l1_aux_level_out = l1_level;
572 }
573 
574 static bool
add_mapping(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t aux_address,uint64_t format_bits,bool * state_changed)575 add_mapping(struct intel_aux_map_context *ctx, uint64_t main_address,
576             uint64_t aux_address, uint64_t format_bits,
577             bool *state_changed)
578 {
579    if (aux_map_debug)
580       fprintf(stderr, "AUX-MAP 0x%"PRIx64" => 0x%"PRIx64"\n", main_address,
581               aux_address);
582 
583    uint32_t l1_index;
584    uint64_t *l1_entry;
585    struct intel_aux_level *l1_aux_level;
586    get_aux_entry(ctx, main_address, &l1_index, NULL, &l1_entry, &l1_aux_level);
587 
588    const uint64_t l1_data =
589       (aux_address & intel_aux_get_meta_address_mask(ctx)) |
590       format_bits |
591       INTEL_AUX_MAP_ENTRY_VALID_BIT;
592 
593    const uint64_t current_l1_data = *l1_entry;
594    if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
595       assert(l1_aux_level->ref_counts[l1_index] == 0);
596       assert((aux_address & 0xffULL) == 0);
597       if (aux_map_debug)
598          fprintf(stderr, "AUX-MAP L1[0x%x] 0x%"PRIx64" -> 0x%"PRIx64"\n",
599                  l1_index, current_l1_data, l1_data);
600       /**
601        * We use non-zero bits in 63:1 to indicate the entry had been filled
602        * previously. If these bits are non-zero and they don't exactly match
603        * what we want to program into the entry, then we must force the
604        * aux-map tables to be flushed.
605        */
606       if (current_l1_data != 0 && \
607           (current_l1_data | INTEL_AUX_MAP_ENTRY_VALID_BIT) != l1_data)
608          *state_changed = true;
609       *l1_entry = l1_data;
610    } else {
611       if (aux_map_debug)
612          fprintf(stderr, "AUX-MAP L1[0x%x] is already marked valid!\n",
613                  l1_index);
614 
615       if (*l1_entry != l1_data) {
616          if (aux_map_debug)
617             fprintf(stderr,
618                     "AUX-MAP L1[0x%x] overwrite 0x%"PRIx64" != 0x%"PRIx64"\n",
619                     l1_index, current_l1_data, l1_data);
620 
621          return false;
622       }
623    }
624 
625    l1_aux_level->ref_counts[l1_index]++;
626 
627    return true;
628 }
629 
630 uint64_t *
intel_aux_map_get_entry(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t * aux_entry_address)631 intel_aux_map_get_entry(struct intel_aux_map_context *ctx,
632                         uint64_t main_address,
633                         uint64_t *aux_entry_address)
634 {
635    pthread_mutex_lock(&ctx->mutex);
636    uint64_t *l1_entry_map;
637    get_aux_entry(ctx, main_address, NULL, aux_entry_address, &l1_entry_map, NULL);
638    pthread_mutex_unlock(&ctx->mutex);
639 
640    return l1_entry_map;
641 }
642 
643 /**
644  * We mark the leaf entry as invalid, but we don't attempt to cleanup the
645  * other levels of translation mappings. Since we attempt to re-use VMA
646  * ranges, hopefully this will not lead to unbounded growth of the translation
647  * tables.
648  */
649 static void
remove_l1_mapping_locked(struct intel_aux_map_context * ctx,uint64_t main_address,bool reset_refcount,bool * state_changed)650 remove_l1_mapping_locked(struct intel_aux_map_context *ctx, uint64_t main_address,
651                          bool reset_refcount, bool *state_changed)
652 {
653    uint32_t l1_index;
654    uint64_t *l1_entry;
655    struct intel_aux_level *l1_aux_level;
656    get_aux_entry(ctx, main_address, &l1_index, NULL, &l1_entry, &l1_aux_level);
657 
658    const uint64_t current_l1_data = *l1_entry;
659    const uint64_t l1_data = current_l1_data & ~INTEL_AUX_MAP_ENTRY_VALID_BIT;
660 
661    if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
662       assert(l1_aux_level->ref_counts[l1_index] == 0);
663       return;
664    } else if (reset_refcount) {
665       l1_aux_level->ref_counts[l1_index] = 0;
666       if (unlikely(l1_data == 0))
667          *state_changed = true;
668       *l1_entry = l1_data;
669    } else {
670       assert(l1_aux_level->ref_counts[l1_index] > 0);
671       if (--l1_aux_level->ref_counts[l1_index] == 0) {
672          /**
673           * We use non-zero bits in 63:1 to indicate the entry had been filled
674           * previously. In the unlikely event that these are all zero, we
675           * force a flush of the aux-map tables.
676           */
677          if (unlikely(l1_data == 0))
678             *state_changed = true;
679          *l1_entry = l1_data;
680       }
681    }
682 }
683 
684 static void
remove_mapping_locked(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t size,bool reset_refcount,bool * state_changed)685 remove_mapping_locked(struct intel_aux_map_context *ctx, uint64_t main_address,
686                       uint64_t size, bool reset_refcount, bool *state_changed)
687 {
688    if (aux_map_debug)
689       fprintf(stderr, "AUX-MAP remove 0x%"PRIx64"-0x%"PRIx64"\n", main_address,
690               main_address + size);
691 
692    uint64_t main_inc_addr = main_address;
693    uint64_t main_page_size = ctx->format->main_page_size;
694    assert((main_address & get_page_mask(main_page_size)) == 0);
695    while (main_inc_addr - main_address < size) {
696       remove_l1_mapping_locked(ctx, main_inc_addr, reset_refcount,
697                                state_changed);
698       main_inc_addr += main_page_size;
699    }
700 }
701 
702 bool
intel_aux_map_add_mapping(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t aux_address,uint64_t main_size_B,uint64_t format_bits)703 intel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t main_address,
704                           uint64_t aux_address, uint64_t main_size_B,
705                           uint64_t format_bits)
706 {
707    bool state_changed = false;
708    pthread_mutex_lock(&ctx->mutex);
709    uint64_t main_inc_addr = main_address;
710    uint64_t aux_inc_addr = aux_address;
711    const uint64_t main_page_size = ctx->format->main_page_size;
712    assert((main_address & get_page_mask(main_page_size)) == 0);
713    const uint64_t aux_page_size = get_meta_page_size(ctx->format);
714    assert((aux_address & get_page_mask(aux_page_size)) == 0);
715    while (main_inc_addr - main_address < main_size_B) {
716       if (!add_mapping(ctx, main_inc_addr, aux_inc_addr, format_bits,
717                        &state_changed)) {
718          break;
719       }
720       main_inc_addr = main_inc_addr + main_page_size;
721       aux_inc_addr = aux_inc_addr + aux_page_size;
722    }
723    bool success = main_inc_addr - main_address >= main_size_B;
724    if (!success && (main_inc_addr > main_address)) {
725       /* If the mapping failed, remove the mapped portion. */
726       remove_mapping_locked(ctx, main_address,
727                             main_inc_addr - main_address,
728                             false /* reset_refcount */, &state_changed);
729    }
730    pthread_mutex_unlock(&ctx->mutex);
731    if (state_changed)
732       p_atomic_inc(&ctx->state_num);
733 
734 
735    return success;
736 }
737 
738 void
intel_aux_map_del_mapping(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t size)739 intel_aux_map_del_mapping(struct intel_aux_map_context *ctx, uint64_t main_address,
740                           uint64_t size)
741 {
742    bool state_changed = false;
743    pthread_mutex_lock(&ctx->mutex);
744    remove_mapping_locked(ctx, main_address, size, false /* reset_refcount */,
745                          &state_changed);
746    pthread_mutex_unlock(&ctx->mutex);
747    if (state_changed)
748       p_atomic_inc(&ctx->state_num);
749 }
750 
751 void
intel_aux_map_unmap_range(struct intel_aux_map_context * ctx,uint64_t main_address,uint64_t size)752 intel_aux_map_unmap_range(struct intel_aux_map_context *ctx, uint64_t main_address,
753                           uint64_t size)
754 {
755    bool state_changed = false;
756    pthread_mutex_lock(&ctx->mutex);
757    remove_mapping_locked(ctx, main_address, size, true /* reset_refcount */,
758                          &state_changed);
759    pthread_mutex_unlock(&ctx->mutex);
760    if (state_changed)
761       p_atomic_inc(&ctx->state_num);
762 }
763 
764 uint32_t
intel_aux_map_get_num_buffers(struct intel_aux_map_context * ctx)765 intel_aux_map_get_num_buffers(struct intel_aux_map_context *ctx)
766 {
767    return p_atomic_read(&ctx->num_buffers);
768 }
769 
770 void
intel_aux_map_fill_bos(struct intel_aux_map_context * ctx,void ** driver_bos,uint32_t max_bos)771 intel_aux_map_fill_bos(struct intel_aux_map_context *ctx, void **driver_bos,
772                        uint32_t max_bos)
773 {
774    assert(p_atomic_read(&ctx->num_buffers) >= max_bos);
775    uint32_t i = 0;
776    list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) {
777       if (i >= max_bos)
778          return;
779       driver_bos[i++] = buf->buffer->driver_bo;
780    }
781 }
782