xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/asahi/agx_pipe.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2010 Red Hat Inc.
3  * Copyright 2014-2017 Broadcom
4  * Copyright 2019-2020 Collabora, Ltd.
5  * Copyright 2006 VMware, Inc.
6  * SPDX-License-Identifier: MIT
7  */
8 #include <errno.h>
9 #include <stdio.h>
10 #include <xf86drm.h>
11 #include "asahi/compiler/agx_compile.h"
12 #include "asahi/layout/layout.h"
13 #include "asahi/lib/decode.h"
14 #include "asahi/lib/unstable_asahi_drm.h"
15 #include "drm-uapi/drm_fourcc.h"
16 #include "frontend/winsys_handle.h"
17 #include "gallium/auxiliary/renderonly/renderonly.h"
18 #include "gallium/auxiliary/util/u_debug_cb.h"
19 #include "gallium/auxiliary/util/u_framebuffer.h"
20 #include "gallium/auxiliary/util/u_sample_positions.h"
21 #include "gallium/auxiliary/util/u_surface.h"
22 #include "gallium/auxiliary/util/u_transfer.h"
23 #include "gallium/auxiliary/util/u_transfer_helper.h"
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_screen.h"
27 #include "pipe/p_state.h"
28 #include "util/bitscan.h"
29 #include "util/format/u_format.h"
30 #include "util/format/u_formats.h"
31 #include "util/half_float.h"
32 #include "util/macros.h"
33 #include "util/simple_mtx.h"
34 #include "util/timespec.h"
35 #include "util/u_drm.h"
36 #include "util/u_gen_mipmap.h"
37 #include "util/u_helpers.h"
38 #include "util/u_inlines.h"
39 #include "util/u_memory.h"
40 #include "util/u_process.h"
41 #include "util/u_resource.h"
42 #include "util/u_screen.h"
43 #include "util/u_upload_mgr.h"
44 #include "util/xmlconfig.h"
45 #include "agx_bg_eot.h"
46 #include "agx_device.h"
47 #include "agx_disk_cache.h"
48 #include "agx_fence.h"
49 #include "agx_helpers.h"
50 #include "agx_pack.h"
51 #include "agx_public.h"
52 #include "agx_state.h"
53 #include "agx_tilebuffer.h"
54 #include "shader_enums.h"
55 
56 /* Fake values, pending UAPI upstreaming */
57 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED
58 #define DRM_FORMAT_MOD_APPLE_TWIDDLED (2)
59 #endif
60 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED
61 #define DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED (3)
62 #endif
63 
64 uint64_t agx_best_modifiers[] = {
65    DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED,
66    DRM_FORMAT_MOD_APPLE_TWIDDLED,
67    DRM_FORMAT_MOD_LINEAR,
68 };
69 
70 /* These limits are arbitrarily chosen and subject to change as
71  * we discover more workloads with heavy shadowing.
72  *
73  * Maximum size of a shadowed object in bytes.
74  * Hint: 1024x1024xRGBA8 = 4 MiB. Go higher for compression.
75  */
76 #define MAX_SHADOW_BYTES (6 * 1024 * 1024)
77 
78 /* Maximum cumulative size to shadow an object before we flush.
79  * Allows shadowing a 4MiB + meta object 8 times with the logic
80  * below (+1 shadow offset implied).
81  */
82 #define MAX_TOTAL_SHADOW_BYTES (32 * 1024 * 1024)
83 
84 void agx_init_state_functions(struct pipe_context *ctx);
85 
86 /*
87  * resource
88  */
89 
90 static enum ail_tiling
ail_modifier_to_tiling(uint64_t modifier)91 ail_modifier_to_tiling(uint64_t modifier)
92 {
93    switch (modifier) {
94    case DRM_FORMAT_MOD_LINEAR:
95       return AIL_TILING_LINEAR;
96    case DRM_FORMAT_MOD_APPLE_TWIDDLED:
97       return AIL_TILING_TWIDDLED;
98    case DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED:
99       return AIL_TILING_TWIDDLED_COMPRESSED;
100    default:
101       unreachable("Unsupported modifier");
102    }
103 }
104 
105 const static char *s_tiling[] = {
106    [AIL_TILING_LINEAR] = "LINR",
107    [AIL_TILING_TWIDDLED] = "TWID",
108    [AIL_TILING_TWIDDLED_COMPRESSED] = "COMP",
109 };
110 
111 #define rsrc_debug(res, ...)                                                   \
112    do {                                                                        \
113       if (agx_device((res)->base.screen)->debug & AGX_DBG_RESOURCE)            \
114          agx_msg(__VA_ARGS__);                                                 \
115    } while (0)
116 
117 static void
agx_resource_debug(struct agx_resource * res,const char * msg)118 agx_resource_debug(struct agx_resource *res, const char *msg)
119 {
120    if (!(agx_device(res->base.screen)->debug & AGX_DBG_RESOURCE))
121       return;
122 
123    int ino = -1;
124    if (res->bo->prime_fd >= 0) {
125       struct stat sb;
126       if (!fstat(res->bo->prime_fd, &sb))
127          ino = sb.st_ino;
128    }
129 
130    agx_msg(
131       "%s%s %dx%dx%d %dL %d/%dM %dS M:%llx %s %s%s S:0x%llx LS:0x%llx CS:0x%llx "
132       "Base=0x%llx Size=0x%llx Meta=0x%llx/0x%llx (%s) %s%s%s%s%s%sfd:%d(%d) @ %p\n",
133       msg ?: "", util_format_short_name(res->base.format), res->base.width0,
134       res->base.height0, res->base.depth0, res->base.array_size,
135       res->base.last_level, res->layout.levels, res->layout.sample_count_sa,
136       (long long)res->modifier, s_tiling[res->layout.tiling],
137       res->layout.mipmapped_z ? "MZ " : "",
138       res->layout.page_aligned_layers ? "PL " : "",
139       (long long)res->layout.linear_stride_B,
140       (long long)res->layout.layer_stride_B,
141       (long long)res->layout.compression_layer_stride_B,
142       (long long)res->bo->va->addr, (long long)res->layout.size_B,
143       res->layout.metadata_offset_B
144          ? ((long long)res->bo->va->addr + res->layout.metadata_offset_B)
145          : 0,
146       (long long)res->layout.metadata_offset_B, res->bo->label,
147       res->bo->flags & AGX_BO_SHARED ? "SH " : "",
148       res->bo->flags & AGX_BO_LOW_VA ? "LO " : "",
149       res->bo->flags & AGX_BO_EXEC ? "EX " : "",
150       res->bo->flags & AGX_BO_WRITEBACK ? "WB " : "",
151       res->bo->flags & AGX_BO_SHAREABLE ? "SA " : "",
152       res->bo->flags & AGX_BO_READONLY ? "RO " : "", res->bo->prime_fd, ino,
153       res);
154 }
155 
156 static void
agx_resource_setup(struct agx_device * dev,struct agx_resource * nresource)157 agx_resource_setup(struct agx_device *dev, struct agx_resource *nresource)
158 {
159    struct pipe_resource *templ = &nresource->base;
160 
161    nresource->layout = (struct ail_layout){
162       .tiling = ail_modifier_to_tiling(nresource->modifier),
163       .mipmapped_z = templ->target == PIPE_TEXTURE_3D,
164       .format = templ->format,
165       .width_px = templ->width0,
166       .height_px = templ->height0,
167       .depth_px = templ->depth0 * templ->array_size,
168       .sample_count_sa = MAX2(templ->nr_samples, 1),
169       .levels = templ->last_level + 1,
170       .writeable_image = templ->bind & PIPE_BIND_SHADER_IMAGE,
171 
172       /* Ostensibly this should be based on the bind, but Gallium bind flags are
173        * notoriously unreliable. The only cost of setting this excessively is a
174        * bit of extra memory use for layered textures, which isn't worth trying
175        * to optimize.
176        */
177       .renderable = true,
178    };
179 }
180 
181 static struct pipe_resource *
agx_resource_from_handle(struct pipe_screen * pscreen,const struct pipe_resource * templat,struct winsys_handle * whandle,unsigned usage)182 agx_resource_from_handle(struct pipe_screen *pscreen,
183                          const struct pipe_resource *templat,
184                          struct winsys_handle *whandle, unsigned usage)
185 {
186    struct agx_device *dev = agx_device(pscreen);
187    struct agx_resource *rsc;
188    struct pipe_resource *prsc;
189 
190    assert(whandle->type == WINSYS_HANDLE_TYPE_FD);
191 
192    rsc = CALLOC_STRUCT(agx_resource);
193    if (!rsc)
194       return NULL;
195 
196    rsc->modifier = whandle->modifier == DRM_FORMAT_MOD_INVALID
197                       ? DRM_FORMAT_MOD_LINEAR
198                       : whandle->modifier;
199 
200    /* We need strides to be aligned. ail asserts this, but we want to fail
201     * gracefully so the app can handle the error.
202     */
203    if (rsc->modifier == DRM_FORMAT_MOD_LINEAR && (whandle->stride % 16) != 0) {
204       FREE(rsc);
205       return false;
206    }
207 
208    prsc = &rsc->base;
209 
210    *prsc = *templat;
211 
212    pipe_reference_init(&prsc->reference, 1);
213    prsc->screen = pscreen;
214 
215    rsc->bo = agx_bo_import(dev, whandle->handle);
216    /* Sometimes an import can fail e.g. on an invalid buffer fd, out of
217     * memory space to mmap it etc.
218     */
219    if (!rsc->bo) {
220       FREE(rsc);
221       return NULL;
222    }
223 
224    agx_resource_setup(dev, rsc);
225 
226    if (rsc->layout.tiling == AIL_TILING_LINEAR) {
227       rsc->layout.linear_stride_B = whandle->stride;
228    } else if (whandle->stride != ail_get_wsi_stride_B(&rsc->layout, 0)) {
229       FREE(rsc);
230       return NULL;
231    }
232 
233    assert(whandle->offset == 0);
234 
235    ail_make_miptree(&rsc->layout);
236 
237    if (prsc->target == PIPE_BUFFER) {
238       assert(rsc->layout.tiling == AIL_TILING_LINEAR);
239       util_range_init(&rsc->valid_buffer_range);
240    }
241 
242    agx_resource_debug(rsc, "Import: ");
243 
244    return prsc;
245 }
246 
247 static bool
agx_resource_get_handle(struct pipe_screen * pscreen,struct pipe_context * ctx,struct pipe_resource * pt,struct winsys_handle * handle,unsigned usage)248 agx_resource_get_handle(struct pipe_screen *pscreen, struct pipe_context *ctx,
249                         struct pipe_resource *pt, struct winsys_handle *handle,
250                         unsigned usage)
251 {
252    struct agx_device *dev = agx_device(pscreen);
253    struct pipe_resource *cur = pt;
254 
255    /* Even though asahi doesn't support multi-planar formats, we
256     * can get here through GBM, which does. Walk the list of planes
257     * to find the right one.
258     */
259    for (int i = 0; i < handle->plane; i++) {
260       cur = cur->next;
261       if (!cur)
262          return false;
263    }
264 
265    struct agx_resource *rsrc = agx_resource(cur);
266 
267    if (handle->type == WINSYS_HANDLE_TYPE_KMS && dev->ro) {
268       rsrc_debug(rsrc, "Get handle: %p (KMS RO)\n", rsrc);
269 
270       if (!rsrc->scanout && dev->ro && (rsrc->base.bind & PIPE_BIND_SCANOUT)) {
271          rsrc->scanout =
272             renderonly_scanout_for_resource(&rsrc->base, dev->ro, NULL);
273       }
274 
275       if (!rsrc->scanout)
276          return false;
277 
278       return renderonly_get_handle(rsrc->scanout, handle);
279    } else if (handle->type == WINSYS_HANDLE_TYPE_KMS) {
280       rsrc_debug(rsrc, "Get handle: %p (KMS)\n", rsrc);
281 
282       handle->handle = rsrc->bo->handle;
283    } else if (handle->type == WINSYS_HANDLE_TYPE_FD) {
284       int fd = agx_bo_export(dev, rsrc->bo);
285 
286       if (fd < 0)
287          return false;
288 
289       handle->handle = fd;
290       if (dev->debug & AGX_DBG_RESOURCE) {
291          struct stat sb;
292          fstat(rsrc->bo->prime_fd, &sb);
293          agx_msg("Get handle: %p (FD %d/%ld)\n", rsrc, fd, (long)sb.st_ino);
294       }
295    } else {
296       /* Other handle types not supported */
297       return false;
298    }
299 
300    handle->stride = ail_get_wsi_stride_B(&rsrc->layout, 0);
301    handle->size = rsrc->layout.size_B;
302    handle->offset = rsrc->layout.level_offsets_B[0];
303    handle->format = rsrc->layout.format;
304    handle->modifier = rsrc->modifier;
305 
306    return true;
307 }
308 
309 static bool
agx_resource_get_param(struct pipe_screen * pscreen,struct pipe_context * pctx,struct pipe_resource * prsc,unsigned plane,unsigned layer,unsigned level,enum pipe_resource_param param,unsigned usage,uint64_t * value)310 agx_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
311                        struct pipe_resource *prsc, unsigned plane,
312                        unsigned layer, unsigned level,
313                        enum pipe_resource_param param, unsigned usage,
314                        uint64_t *value)
315 {
316    struct agx_resource *rsrc = (struct agx_resource *)prsc;
317 
318    switch (param) {
319    case PIPE_RESOURCE_PARAM_STRIDE:
320       *value = ail_get_wsi_stride_B(&rsrc->layout, level);
321       return true;
322    case PIPE_RESOURCE_PARAM_OFFSET:
323       *value = rsrc->layout.level_offsets_B[level];
324       return true;
325    case PIPE_RESOURCE_PARAM_MODIFIER:
326       *value = rsrc->modifier;
327       return true;
328    case PIPE_RESOURCE_PARAM_NPLANES:
329       /* We don't support multi-planar formats, but we should still handle
330        * this case for GBM shared resources.
331        */
332       *value = util_resource_num(prsc);
333       return true;
334    default:
335       return false;
336    }
337 }
338 
339 static bool
agx_is_2d(enum pipe_texture_target target)340 agx_is_2d(enum pipe_texture_target target)
341 {
342    return (target == PIPE_TEXTURE_2D || target == PIPE_TEXTURE_RECT);
343 }
344 
345 static bool
agx_linear_allowed(const struct agx_resource * pres)346 agx_linear_allowed(const struct agx_resource *pres)
347 {
348    /* Mipmapping not allowed with linear */
349    if (pres->base.last_level != 0)
350       return false;
351 
352    /* Depth/stencil buffers must not be linear */
353    if (pres->base.bind & PIPE_BIND_DEPTH_STENCIL)
354       return false;
355 
356    /* Multisampling not allowed with linear */
357    if (pres->base.nr_samples > 1)
358       return false;
359 
360    /* Block compression not allowed with linear */
361    if (util_format_is_compressed(pres->base.format))
362       return false;
363 
364    switch (pres->base.target) {
365    /* Buffers are always linear, even with image atomics */
366    case PIPE_BUFFER:
367 
368    /* Linear textures require specifying their strides explicitly, which only
369     * works for 2D textures. Rectangle textures are a special case of 2D.
370     *
371     * 1D textures only exist in GLES and are lowered to 2D to bypass hardware
372     * limitations.
373     *
374     * However, we don't want to support this case in the image atomic
375     * implementation, so linear shader images are specially forbidden.
376     */
377    case PIPE_TEXTURE_1D:
378    case PIPE_TEXTURE_1D_ARRAY:
379    case PIPE_TEXTURE_2D:
380    case PIPE_TEXTURE_2D_ARRAY:
381    case PIPE_TEXTURE_RECT:
382       if (pres->base.bind & PIPE_BIND_SHADER_IMAGE)
383          return false;
384 
385       break;
386 
387    /* No other texture type can specify a stride */
388    default:
389       return false;
390    }
391 
392    return true;
393 }
394 
395 static bool
agx_twiddled_allowed(const struct agx_resource * pres)396 agx_twiddled_allowed(const struct agx_resource *pres)
397 {
398    /* Certain binds force linear */
399    if (pres->base.bind & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_LINEAR))
400       return false;
401 
402    /* Buffers must be linear */
403    if (pres->base.target == PIPE_BUFFER)
404       return false;
405 
406    /* Anything else may be twiddled */
407    return true;
408 }
409 
410 static bool
agx_compression_allowed(const struct agx_resource * pres)411 agx_compression_allowed(const struct agx_resource *pres)
412 {
413    /* Allow disabling compression for debugging */
414    if (agx_device(pres->base.screen)->debug & AGX_DBG_NOCOMPRESS) {
415       rsrc_debug(pres, "No compression: disabled\n");
416       return false;
417    }
418 
419    /* Limited to renderable */
420    if (pres->base.bind &
421        ~(PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
422          PIPE_BIND_DEPTH_STENCIL | PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) {
423       rsrc_debug(pres, "No compression: not renderable\n");
424       return false;
425    }
426 
427    if (!ail_can_compress(pres->base.format, pres->base.width0,
428                          pres->base.height0, MAX2(pres->base.nr_samples, 1))) {
429       rsrc_debug(pres, "No compression: incompatible layout\n");
430       return false;
431    }
432 
433    if (pres->base.format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
434       rsrc_debug(pres, "No compression: RGB9E5 copies need work\n");
435       return false;
436    }
437 
438    return true;
439 }
440 
441 static uint64_t
agx_select_modifier_from_list(const struct agx_resource * pres,const uint64_t * modifiers,int count)442 agx_select_modifier_from_list(const struct agx_resource *pres,
443                               const uint64_t *modifiers, int count)
444 {
445    if (agx_twiddled_allowed(pres) && agx_compression_allowed(pres) &&
446        drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED, modifiers,
447                          count))
448       return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
449 
450    if (agx_twiddled_allowed(pres) &&
451        drm_find_modifier(DRM_FORMAT_MOD_APPLE_TWIDDLED, modifiers, count))
452       return DRM_FORMAT_MOD_APPLE_TWIDDLED;
453 
454    if (agx_linear_allowed(pres) &&
455        drm_find_modifier(DRM_FORMAT_MOD_LINEAR, modifiers, count))
456       return DRM_FORMAT_MOD_LINEAR;
457 
458    /* We didn't find anything */
459    return DRM_FORMAT_MOD_INVALID;
460 }
461 
462 static uint64_t
agx_select_best_modifier(const struct agx_resource * pres)463 agx_select_best_modifier(const struct agx_resource *pres)
464 {
465    /* Prefer linear for staging resources, which should be as fast as possible
466     * to write from the CPU.
467     */
468    if (agx_linear_allowed(pres) && pres->base.usage == PIPE_USAGE_STAGING)
469       return DRM_FORMAT_MOD_LINEAR;
470 
471    /* For SCANOUT or SHARED resources with no explicit modifier selection, force
472     * linear since we cannot expect consumers to correctly pass through the
473     * modifier (unless linear is not allowed at all).
474     */
475    if (agx_linear_allowed(pres) &&
476        pres->base.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) {
477       return DRM_FORMAT_MOD_LINEAR;
478    }
479 
480    if (agx_twiddled_allowed(pres)) {
481       if (agx_compression_allowed(pres))
482          return DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED;
483       else
484          return DRM_FORMAT_MOD_APPLE_TWIDDLED;
485    }
486 
487    if (agx_linear_allowed(pres))
488       return DRM_FORMAT_MOD_LINEAR;
489    else
490       return DRM_FORMAT_MOD_INVALID;
491 }
492 
493 static struct pipe_resource *
agx_resource_create_with_modifiers(struct pipe_screen * screen,const struct pipe_resource * templ,const uint64_t * modifiers,int count)494 agx_resource_create_with_modifiers(struct pipe_screen *screen,
495                                    const struct pipe_resource *templ,
496                                    const uint64_t *modifiers, int count)
497 {
498    struct agx_device *dev = agx_device(screen);
499    struct agx_resource *nresource;
500 
501    nresource = CALLOC_STRUCT(agx_resource);
502    if (!nresource)
503       return NULL;
504 
505    nresource->base = *templ;
506    nresource->base.screen = screen;
507 
508    if (modifiers) {
509       nresource->modifier =
510          agx_select_modifier_from_list(nresource, modifiers, count);
511    } else {
512       nresource->modifier = agx_select_best_modifier(nresource);
513    }
514 
515    /* There may not be a matching modifier, bail if so */
516    if (nresource->modifier == DRM_FORMAT_MOD_INVALID) {
517       free(nresource);
518       return NULL;
519    }
520 
521    /* If there's only 1 layer and there's no compression, there's no harm in
522     * inferring the shader image flag. Do so to avoid reallocation in case the
523     * resource is later used as an image.
524     */
525    if (nresource->modifier != DRM_FORMAT_MOD_APPLE_TWIDDLED_COMPRESSED &&
526        templ->depth0 == 1) {
527 
528       nresource->base.bind |= PIPE_BIND_SHADER_IMAGE;
529    }
530 
531    nresource->mipmapped = (templ->last_level > 0);
532 
533    assert(templ->format != PIPE_FORMAT_Z24X8_UNORM &&
534           templ->format != PIPE_FORMAT_Z24_UNORM_S8_UINT &&
535           "u_transfer_helper should have lowered");
536 
537    agx_resource_setup(dev, nresource);
538 
539    pipe_reference_init(&nresource->base.reference, 1);
540 
541    ail_make_miptree(&nresource->layout);
542 
543    /* Fail Piglit's obnoxious allocations */
544    if (nresource->layout.size_B >= (1ull << 32)) {
545       free(nresource);
546       return NULL;
547    }
548 
549    if (templ->target == PIPE_BUFFER) {
550       assert(nresource->layout.tiling == AIL_TILING_LINEAR);
551       util_range_init(&nresource->valid_buffer_range);
552    }
553 
554    /* Guess a label based on the bind */
555    unsigned bind = templ->bind;
556 
557    const char *label = (bind & PIPE_BIND_INDEX_BUFFER)     ? "Index buffer"
558                        : (bind & PIPE_BIND_SCANOUT)        ? "Scanout"
559                        : (bind & PIPE_BIND_DISPLAY_TARGET) ? "Display target"
560                        : (bind & PIPE_BIND_SHARED)         ? "Shared resource"
561                        : (bind & PIPE_BIND_RENDER_TARGET)  ? "Render target"
562                        : (bind & PIPE_BIND_DEPTH_STENCIL)
563                           ? "Depth/stencil buffer"
564                        : (bind & PIPE_BIND_SAMPLER_VIEW)    ? "Texture"
565                        : (bind & PIPE_BIND_VERTEX_BUFFER)   ? "Vertex buffer"
566                        : (bind & PIPE_BIND_CONSTANT_BUFFER) ? "Constant buffer"
567                        : (bind & PIPE_BIND_GLOBAL)          ? "Global memory"
568                        : (bind & PIPE_BIND_SHADER_BUFFER)   ? "Shader buffer"
569                        : (bind & PIPE_BIND_SHADER_IMAGE)    ? "Shader image"
570                                                             : "Other resource";
571 
572    uint32_t create_flags = 0;
573 
574    /* Default to write-combine resources, but use writeback if that is expected
575     * to be beneficial.
576     */
577    if (nresource->base.usage == PIPE_USAGE_STAGING ||
578        (nresource->base.flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
579 
580       create_flags |= AGX_BO_WRITEBACK;
581    }
582 
583    /* Allow disabling write-combine to debug performance issues */
584    if (dev->debug & AGX_DBG_NOWC) {
585       create_flags |= AGX_BO_WRITEBACK;
586    }
587 
588    /* Create buffers that might be shared with the SHAREABLE flag */
589    if (bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SHARED))
590       create_flags |= AGX_BO_SHAREABLE;
591 
592    nresource->bo =
593       agx_bo_create(dev, nresource->layout.size_B, 0, create_flags, label);
594 
595    if (!nresource->bo) {
596       FREE(nresource);
597       return NULL;
598    }
599 
600    agx_resource_debug(nresource, "New: ");
601    return &nresource->base;
602 }
603 
604 static struct pipe_resource *
agx_resource_create(struct pipe_screen * screen,const struct pipe_resource * templ)605 agx_resource_create(struct pipe_screen *screen,
606                     const struct pipe_resource *templ)
607 {
608    return agx_resource_create_with_modifiers(screen, templ, NULL, 0);
609 }
610 
611 static void
agx_resource_destroy(struct pipe_screen * screen,struct pipe_resource * prsrc)612 agx_resource_destroy(struct pipe_screen *screen, struct pipe_resource *prsrc)
613 {
614    struct agx_resource *rsrc = (struct agx_resource *)prsrc;
615    struct agx_screen *agx_screen = (struct agx_screen *)screen;
616 
617    agx_resource_debug(rsrc, "Destroy: ");
618 
619    if (prsrc->target == PIPE_BUFFER)
620       util_range_destroy(&rsrc->valid_buffer_range);
621 
622    if (rsrc->scanout)
623       renderonly_scanout_destroy(rsrc->scanout, agx_screen->dev.ro);
624 
625    agx_bo_unreference(&agx_screen->dev, rsrc->bo);
626    FREE(rsrc);
627 }
628 
629 void
agx_batch_track_image(struct agx_batch * batch,struct pipe_image_view * image)630 agx_batch_track_image(struct agx_batch *batch, struct pipe_image_view *image)
631 {
632    struct agx_resource *rsrc = agx_resource(image->resource);
633 
634    if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
635       batch->incoherent_writes = true;
636 
637       if (rsrc->base.target == PIPE_BUFFER) {
638          agx_batch_writes_range(batch, rsrc, image->u.buf.offset,
639                                 image->u.buf.size);
640       } else {
641          agx_batch_writes(batch, rsrc, image->u.tex.level);
642       }
643    } else {
644       agx_batch_reads(batch, rsrc);
645    }
646 }
647 
648 /*
649  * transfer
650  */
651 
652 static void
agx_transfer_flush_region(struct pipe_context * pipe,struct pipe_transfer * transfer,const struct pipe_box * box)653 agx_transfer_flush_region(struct pipe_context *pipe,
654                           struct pipe_transfer *transfer,
655                           const struct pipe_box *box)
656 {
657 }
658 
659 /* Reallocate the backing buffer of a resource, returns true if successful */
660 static bool
agx_shadow(struct agx_context * ctx,struct agx_resource * rsrc,bool needs_copy)661 agx_shadow(struct agx_context *ctx, struct agx_resource *rsrc, bool needs_copy)
662 {
663    struct agx_device *dev = agx_device(ctx->base.screen);
664    struct agx_bo *old = rsrc->bo;
665    size_t size = rsrc->layout.size_B;
666    unsigned flags = old->flags;
667 
668    if (dev->debug & AGX_DBG_NOSHADOW)
669       return false;
670 
671    /* If a resource is (or could be) shared, shadowing would desync across
672     * processes. (It's also not what this path is for.)
673     */
674    if (flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))
675       return false;
676 
677    /* Do not shadow resources that are too large */
678    if (size > MAX_SHADOW_BYTES && needs_copy)
679       return false;
680 
681    /* Do not shadow resources too much */
682    if (rsrc->shadowed_bytes >= MAX_TOTAL_SHADOW_BYTES && needs_copy)
683       return false;
684 
685    rsrc->shadowed_bytes += size;
686 
687    /* If we need to copy, we reallocate the resource with cached-coherent
688     * memory. This is a heuristic: it assumes that if the app needs a shadows
689     * (with a copy) now, it will again need to shadow-and-copy the same resource
690     * in the future. This accelerates the later copies, since otherwise the copy
691     * involves reading uncached memory.
692     */
693    if (needs_copy)
694       flags |= AGX_BO_WRITEBACK;
695 
696    struct agx_bo *new_ = agx_bo_create(dev, size, 0, flags, old->label);
697 
698    /* If allocation failed, we can fallback on a flush gracefully*/
699    if (new_ == NULL)
700       return false;
701 
702    if (needs_copy) {
703       perf_debug_ctx(ctx, "Shadowing %zu bytes on the CPU (%s)", size,
704                      (old->flags & AGX_BO_WRITEBACK) ? "cached" : "uncached");
705       agx_resource_debug(rsrc, "Shadowed: ");
706 
707       memcpy(new_->map, old->map, size);
708    }
709 
710    /* Swap the pointers, dropping a reference */
711    agx_bo_unreference(dev, rsrc->bo);
712    rsrc->bo = new_;
713 
714    /* Reemit descriptors using this resource */
715    agx_dirty_all(ctx);
716    return true;
717 }
718 
719 /*
720  * Perform the required synchronization before a transfer_map operation can
721  * complete. This may require syncing batches.
722  */
723 static void
agx_prepare_for_map(struct agx_context * ctx,struct agx_resource * rsrc,unsigned level,unsigned usage,const struct pipe_box * box,bool staging_blit)724 agx_prepare_for_map(struct agx_context *ctx, struct agx_resource *rsrc,
725                     unsigned level,
726                     unsigned usage, /* a combination of PIPE_MAP_x */
727                     const struct pipe_box *box, bool staging_blit)
728 {
729    /* GPU access does not require explicit syncs, as the batch tracking logic
730     * will ensure correct ordering automatically.
731     */
732    if (staging_blit)
733       return;
734 
735    /* If the level has not been written, we may freely do CPU access (writes),
736     * even if other levels are being written by the GPU. This lets us write some
737     * mip levels on the CPU and some on the GPU, without stalling.
738     */
739    if (!agx_resource_valid(rsrc, level))
740       return;
741 
742    /* Upgrade DISCARD_RANGE to WHOLE_RESOURCE if the whole resource is
743     * being mapped.
744     */
745    if ((usage & PIPE_MAP_DISCARD_RANGE) &&
746        !(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
747        rsrc->base.last_level == 0 &&
748        util_texrange_covers_whole_level(&rsrc->base, 0, box->x, box->y, box->z,
749                                         box->width, box->height, box->depth)) {
750 
751       usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
752    }
753 
754    /* Shadowing doesn't work separate stencil or shared resources */
755    if (rsrc->separate_stencil || (rsrc->bo->flags & AGX_BO_SHARED))
756       usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
757 
758    /* If the access is unsynchronized, there's nothing to do */
759    if (usage & PIPE_MAP_UNSYNCHRONIZED)
760       return;
761 
762    /* If the range being accessed is uninitialized, we do not need to sync. */
763    if (rsrc->base.target == PIPE_BUFFER && !(rsrc->bo->flags & AGX_BO_SHARED) &&
764        !util_ranges_intersect(&rsrc->valid_buffer_range, box->x,
765                               box->x + box->width))
766       return;
767 
768    /* Everything after this needs the context, which is not safe for
769     * unsynchronized transfers when we claim
770     * PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE.
771     */
772    assert(!(usage & PIPE_MAP_UNSYNCHRONIZED));
773 
774    /* Reading or writing from the CPU requires syncing writers. */
775    agx_sync_writer(ctx, rsrc, "Unsynchronized CPU transfer");
776 
777    /* Additionally, writing needs readers synced. */
778    if (!(usage & PIPE_MAP_WRITE))
779       return;
780 
781    /* If there are no readers, we're done. We check at the start to
782     * avoid expensive shadowing paths or duplicated checks in this hapyp path.
783     */
784    if (!agx_any_batch_uses_resource(ctx, rsrc)) {
785       rsrc->shadowed_bytes = 0;
786       return;
787    }
788 
789    /* There are readers. Try to invalidate the resource to avoid a sync */
790    if ((usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
791        agx_shadow(ctx, rsrc, false))
792       return;
793 
794    /* Or try to shadow it */
795    if (!(rsrc->base.flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) &&
796        agx_shadow(ctx, rsrc, true))
797       return;
798 
799    /* Otherwise, we need to sync */
800    agx_sync_readers(ctx, rsrc, "Unsynchronized write");
801 
802    rsrc->shadowed_bytes = 0;
803 }
804 
805 /*
806  * Return a colour-renderable format compatible with a depth/stencil format, to
807  * be used as an interchange format for depth/stencil blits. For
808  * non-depth/stencil formats, returns the format itself, except when that format
809  * would not round-trip so we return a compatible roundtrippable format.
810  */
811 static enum pipe_format
agx_staging_format(enum pipe_format format)812 agx_staging_format(enum pipe_format format)
813 {
814    switch (format) {
815    case PIPE_FORMAT_Z16_UNORM:
816       return PIPE_FORMAT_R16_UNORM;
817    case PIPE_FORMAT_Z32_FLOAT:
818       return PIPE_FORMAT_R32_FLOAT;
819    case PIPE_FORMAT_S8_UINT:
820       return PIPE_FORMAT_R8_UINT;
821    default:
822       /* Z24 and combined Z/S are lowered to one of the above formats by
823        * u_transfer_helper. The caller needs to pass in the rsrc->layout.format
824        * and not the rsrc->base.format to get the lowered physical format
825        * (rather than the API logical format).
826        */
827       assert(!util_format_is_depth_or_stencil(format) &&
828              "no other depth/stencil formats allowed for staging");
829 
830       /* However, snorm does not round trip, so don't use that for staging */
831       return util_format_snorm_to_sint(format);
832    }
833 }
834 
835 /* Most of the time we can do CPU-side transfers, but sometimes we need to use
836  * the 3D pipe for this. Let's wrap u_blitter to blit to/from staging textures.
837  * Code adapted from panfrost */
838 
839 static struct agx_resource *
agx_alloc_staging(struct pipe_screen * screen,struct agx_resource * rsc,unsigned level,const struct pipe_box * box)840 agx_alloc_staging(struct pipe_screen *screen, struct agx_resource *rsc,
841                   unsigned level, const struct pipe_box *box)
842 {
843    struct pipe_resource tmpl = rsc->base;
844 
845    tmpl.usage = PIPE_USAGE_STAGING;
846    tmpl.width0 = box->width;
847    tmpl.height0 = box->height;
848    tmpl.depth0 = 1;
849 
850    /* We need a linear staging resource. We have linear 2D arrays, but not
851     * linear 3D or cube textures. So switch to 2D arrays if needed.
852     */
853    switch (tmpl.target) {
854    case PIPE_TEXTURE_2D_ARRAY:
855    case PIPE_TEXTURE_CUBE:
856    case PIPE_TEXTURE_CUBE_ARRAY:
857    case PIPE_TEXTURE_3D:
858       tmpl.target = PIPE_TEXTURE_2D_ARRAY;
859       tmpl.array_size = box->depth;
860       break;
861    default:
862       assert(tmpl.array_size == 1);
863       assert(box->depth == 1);
864       break;
865    }
866 
867    tmpl.last_level = 0;
868 
869    /* Linear is incompatible with depth/stencil, so we convert */
870    tmpl.format = agx_staging_format(rsc->layout.format);
871    tmpl.bind =
872       PIPE_BIND_LINEAR | PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
873 
874    struct pipe_resource *pstaging = screen->resource_create(screen, &tmpl);
875    if (!pstaging)
876       return NULL;
877 
878    return agx_resource(pstaging);
879 }
880 
881 static void
agx_blit_from_staging(struct pipe_context * pctx,struct agx_transfer * trans)882 agx_blit_from_staging(struct pipe_context *pctx, struct agx_transfer *trans)
883 {
884    struct pipe_resource *dst = trans->base.resource;
885    struct pipe_blit_info blit = {0};
886 
887    blit.dst.resource = dst;
888    blit.dst.format = agx_staging_format(agx_resource(dst)->layout.format);
889    blit.dst.level = trans->base.level;
890    blit.dst.box = trans->base.box;
891    blit.src.resource = trans->staging.rsrc;
892    blit.src.format = blit.dst.format;
893    blit.src.level = 0;
894    blit.src.box = trans->staging.box;
895    blit.mask = util_format_get_mask(blit.src.format);
896    blit.filter = PIPE_TEX_FILTER_NEAREST;
897 
898    agx_blit(pctx, &blit);
899 }
900 
901 static void
agx_blit_to_staging(struct pipe_context * pctx,struct agx_transfer * trans)902 agx_blit_to_staging(struct pipe_context *pctx, struct agx_transfer *trans)
903 {
904    struct pipe_resource *src = trans->base.resource;
905    struct pipe_blit_info blit = {0};
906 
907    blit.src.resource = src;
908    blit.src.format = agx_staging_format(agx_resource(src)->layout.format);
909    blit.src.level = trans->base.level;
910    blit.src.box = trans->base.box;
911    blit.dst.resource = trans->staging.rsrc;
912    blit.dst.format = blit.src.format;
913    blit.dst.level = 0;
914    blit.dst.box = trans->staging.box;
915    blit.mask = util_format_get_mask(blit.dst.format);
916    blit.filter = PIPE_TEX_FILTER_NEAREST;
917 
918    agx_blit(pctx, &blit);
919 }
920 
921 static void *
agx_transfer_map(struct pipe_context * pctx,struct pipe_resource * resource,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** out_transfer)922 agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource,
923                  unsigned level,
924                  unsigned usage, /* a combination of PIPE_MAP_x */
925                  const struct pipe_box *box,
926                  struct pipe_transfer **out_transfer)
927 {
928    struct agx_context *ctx = agx_context(pctx);
929    struct agx_resource *rsrc = agx_resource(resource);
930    struct agx_device *dev = agx_device(ctx->base.screen);
931 
932    /* Can't map tiled/compressed directly */
933    if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR)
934       return NULL;
935 
936    /* Can't transfer out of bounds mip levels */
937    if (level >= rsrc->layout.levels)
938       return NULL;
939 
940    /* For compression, we use a staging blit as we do not implement AGX
941     * compression in software. In some cases, we could use this path for
942     * twiddled too, but we don't have a use case for that yet.
943     */
944    bool staging_blit = ail_is_level_compressed(&rsrc->layout, level);
945 
946    agx_prepare_for_map(ctx, rsrc, level, usage, box, staging_blit);
947 
948    /* Track the written buffer range */
949    if (resource->target == PIPE_BUFFER) {
950       /* Note the ordering: DISCARD|WRITE is valid, so clear before adding. */
951       if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
952          util_range_set_empty(&rsrc->valid_buffer_range);
953       if (usage & PIPE_MAP_WRITE) {
954          util_range_add(resource, &rsrc->valid_buffer_range, box->x,
955                         box->x + box->width);
956       }
957    }
958 
959    struct agx_transfer *transfer = CALLOC_STRUCT(agx_transfer);
960    transfer->base.level = level;
961    transfer->base.usage = usage;
962    transfer->base.box = *box;
963 
964    pipe_resource_reference(&transfer->base.resource, resource);
965    *out_transfer = &transfer->base;
966 
967    if (staging_blit) {
968       /* Should never happen for buffers, and it's not safe */
969       assert(resource->target != PIPE_BUFFER);
970 
971       struct agx_resource *staging =
972          agx_alloc_staging(pctx->screen, rsrc, level, box);
973       assert(staging);
974 
975       /* Staging resources have one LOD: level 0. Query the strides
976        * on this LOD.
977        */
978       transfer->base.stride = ail_get_linear_stride_B(&staging->layout, 0);
979       transfer->base.layer_stride = staging->layout.layer_stride_B;
980       transfer->staging.rsrc = &staging->base;
981 
982       transfer->staging.box = *box;
983       transfer->staging.box.x = 0;
984       transfer->staging.box.y = 0;
985       transfer->staging.box.z = 0;
986 
987       assert(transfer->staging.rsrc != NULL);
988 
989       if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
990          agx_blit_to_staging(pctx, transfer);
991          agx_sync_writer(ctx, staging, "GPU read staging blit");
992       }
993 
994       dev->ops.bo_mmap(dev, staging->bo);
995       return staging->bo->map;
996    }
997 
998    dev->ops.bo_mmap(dev, rsrc->bo);
999 
1000    if (ail_is_level_twiddled_uncompressed(&rsrc->layout, level)) {
1001       /* Should never happen for buffers, and it's not safe */
1002       assert(resource->target != PIPE_BUFFER);
1003 
1004       transfer->base.stride =
1005          util_format_get_stride(rsrc->layout.format, box->width);
1006 
1007       transfer->base.layer_stride = util_format_get_2d_size(
1008          rsrc->layout.format, transfer->base.stride, box->height);
1009 
1010       transfer->map = calloc(transfer->base.layer_stride, box->depth);
1011 
1012       if ((usage & PIPE_MAP_READ) && agx_resource_valid(rsrc, level)) {
1013          for (unsigned z = 0; z < box->depth; ++z) {
1014             uint8_t *map = agx_map_texture_cpu(rsrc, level, box->z + z);
1015             uint8_t *dst =
1016                (uint8_t *)transfer->map + transfer->base.layer_stride * z;
1017 
1018             ail_detile(map, dst, &rsrc->layout, level, transfer->base.stride,
1019                        box->x, box->y, box->width, box->height);
1020          }
1021       }
1022 
1023       return transfer->map;
1024    } else {
1025       assert(rsrc->modifier == DRM_FORMAT_MOD_LINEAR);
1026 
1027       transfer->base.stride = ail_get_linear_stride_B(&rsrc->layout, level);
1028       transfer->base.layer_stride = rsrc->layout.layer_stride_B;
1029 
1030       /* Be conservative for direct writes */
1031       if ((usage & PIPE_MAP_WRITE) &&
1032           (usage &
1033            (PIPE_MAP_DIRECTLY | PIPE_MAP_PERSISTENT | PIPE_MAP_COHERENT))) {
1034          BITSET_SET(rsrc->data_valid, level);
1035       }
1036 
1037       uint32_t offset =
1038          ail_get_linear_pixel_B(&rsrc->layout, level, box->x, box->y, box->z);
1039 
1040       return ((uint8_t *)rsrc->bo->map) + offset;
1041    }
1042 }
1043 
1044 static void
agx_transfer_unmap(struct pipe_context * pctx,struct pipe_transfer * transfer)1045 agx_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *transfer)
1046 {
1047    /* Gallium expects writeback here, so we tile */
1048 
1049    struct agx_transfer *trans = agx_transfer(transfer);
1050    struct pipe_resource *prsrc = transfer->resource;
1051    struct agx_resource *rsrc = (struct agx_resource *)prsrc;
1052 
1053    if (trans->staging.rsrc && (transfer->usage & PIPE_MAP_WRITE)) {
1054       assert(prsrc->target != PIPE_BUFFER);
1055       agx_blit_from_staging(pctx, trans);
1056       agx_flush_readers(agx_context(pctx), agx_resource(trans->staging.rsrc),
1057                         "GPU write staging blit");
1058    } else if (trans->map && (transfer->usage & PIPE_MAP_WRITE)) {
1059       assert(
1060          ail_is_level_twiddled_uncompressed(&rsrc->layout, transfer->level));
1061 
1062       for (unsigned z = 0; z < transfer->box.depth; ++z) {
1063          uint8_t *map =
1064             agx_map_texture_cpu(rsrc, transfer->level, transfer->box.z + z);
1065          uint8_t *src = (uint8_t *)trans->map + transfer->layer_stride * z;
1066 
1067          ail_tile(map, src, &rsrc->layout, transfer->level, transfer->stride,
1068                   transfer->box.x, transfer->box.y, transfer->box.width,
1069                   transfer->box.height);
1070       }
1071    }
1072 
1073    /* The level we wrote is now initialized. We do this at the end so
1074     * blit_from_staging can avoid reloading existing contents.
1075     */
1076    if (transfer->usage & PIPE_MAP_WRITE)
1077       BITSET_SET(rsrc->data_valid, transfer->level);
1078 
1079    /* Free the transfer */
1080    free(trans->map);
1081    pipe_resource_reference(&trans->staging.rsrc, NULL);
1082    pipe_resource_reference(&transfer->resource, NULL);
1083    FREE(transfer);
1084 }
1085 
1086 /*
1087  * clear/copy
1088  */
1089 static void
agx_clear(struct pipe_context * pctx,unsigned buffers,const struct pipe_scissor_state * scissor_state,const union pipe_color_union * color,double depth,unsigned stencil)1090 agx_clear(struct pipe_context *pctx, unsigned buffers,
1091           const struct pipe_scissor_state *scissor_state,
1092           const union pipe_color_union *color, double depth, unsigned stencil)
1093 {
1094    struct agx_context *ctx = agx_context(pctx);
1095    struct agx_batch *batch = agx_get_batch(ctx);
1096 
1097    if (unlikely(!agx_render_condition_check(ctx)))
1098       return;
1099 
1100    unsigned fastclear = buffers & ~(batch->draw | batch->load);
1101    unsigned slowclear = buffers & ~fastclear;
1102 
1103    assert(scissor_state == NULL && "we don't support PIPE_CAP_CLEAR_SCISSORED");
1104 
1105    /* Fast clears configure the batch */
1106    for (unsigned rt = 0; rt < PIPE_MAX_COLOR_BUFS; ++rt) {
1107       if (!(fastclear & (PIPE_CLEAR_COLOR0 << rt)))
1108          continue;
1109 
1110       static_assert(sizeof(color->f) == 16, "mismatched structure");
1111 
1112       /* Clear colour must be clamped to properly handle signed ints. */
1113       union pipe_color_union clamped =
1114          util_clamp_color(batch->key.cbufs[rt]->format, color);
1115 
1116       batch->uploaded_clear_color[rt] = agx_pool_upload_aligned(
1117          &batch->pool, clamped.f, sizeof(clamped.f), 16);
1118    }
1119 
1120    if (fastclear & PIPE_CLEAR_DEPTH)
1121       batch->clear_depth = depth;
1122 
1123    if (fastclear & PIPE_CLEAR_STENCIL)
1124       batch->clear_stencil = stencil;
1125 
1126    /* Slow clears draw a fullscreen rectangle */
1127    if (slowclear) {
1128       agx_blitter_save(ctx, ctx->blitter, false /* render cond */);
1129       util_blitter_clear(
1130          ctx->blitter, ctx->framebuffer.width, ctx->framebuffer.height,
1131          util_framebuffer_get_num_layers(&ctx->framebuffer), slowclear, color,
1132          depth, stencil,
1133          util_framebuffer_get_num_samples(&ctx->framebuffer) > 1);
1134    }
1135 
1136    if (fastclear)
1137       agx_batch_init_state(batch);
1138 
1139    batch->clear |= fastclear;
1140    batch->resolve |= buffers;
1141    assert((batch->draw & slowclear) == slowclear);
1142 }
1143 
1144 static void
transition_resource(struct pipe_context * pctx,struct agx_resource * rsrc,struct pipe_resource * templ)1145 transition_resource(struct pipe_context *pctx, struct agx_resource *rsrc,
1146                     struct pipe_resource *templ)
1147 {
1148    struct agx_resource *new_res =
1149       agx_resource(pctx->screen->resource_create(pctx->screen, templ));
1150 
1151    assert(new_res);
1152    assert(!(rsrc->base.bind & PIPE_BIND_SHARED) && "cannot swap BOs if shared");
1153 
1154    int level;
1155    BITSET_FOREACH_SET(level, rsrc->data_valid, PIPE_MAX_TEXTURE_LEVELS) {
1156       /* Copy each valid level */
1157       struct pipe_box box;
1158       u_box_3d(0, 0, 0, u_minify(rsrc->layout.width_px, level),
1159                u_minify(rsrc->layout.height_px, level),
1160                util_num_layers(&rsrc->base, level), &box);
1161 
1162       agx_resource_copy_region(pctx, &new_res->base, level, 0, 0, 0,
1163                                &rsrc->base, level, &box);
1164    }
1165 
1166    /* Flush the blits out, to make sure the old resource is no longer used */
1167    agx_flush_writer(agx_context(pctx), new_res, "flush_resource");
1168 
1169    /* Copy the bind flags and swap the BOs */
1170    struct agx_bo *old = rsrc->bo;
1171    rsrc->base.bind = new_res->base.bind;
1172    rsrc->layout = new_res->layout;
1173    rsrc->modifier = new_res->modifier;
1174    rsrc->bo = new_res->bo;
1175    new_res->bo = old;
1176 
1177    /* Free the new resource, which now owns the old BO */
1178    pipe_resource_reference((struct pipe_resource **)&new_res, NULL);
1179 }
1180 
1181 void
agx_decompress(struct agx_context * ctx,struct agx_resource * rsrc,const char * reason)1182 agx_decompress(struct agx_context *ctx, struct agx_resource *rsrc,
1183                const char *reason)
1184 {
1185    if (rsrc->layout.tiling == AIL_TILING_TWIDDLED_COMPRESSED) {
1186       perf_debug_ctx(ctx, "Decompressing resource due to %s", reason);
1187    } else if (!rsrc->layout.writeable_image) {
1188       perf_debug_ctx(ctx, "Reallocating image due to %s", reason);
1189    }
1190 
1191    struct pipe_resource templ = rsrc->base;
1192    assert(!(templ.bind & PIPE_BIND_SHADER_IMAGE) && "currently compressed");
1193    templ.bind |= PIPE_BIND_SHADER_IMAGE /* forces off compression */;
1194    transition_resource(&ctx->base, rsrc, &templ);
1195 }
1196 
1197 static void
agx_flush_resource(struct pipe_context * pctx,struct pipe_resource * pres)1198 agx_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres)
1199 {
1200    struct agx_resource *rsrc = agx_resource(pres);
1201 
1202    /* flush_resource is used to prepare resources for sharing, so if this is not
1203     * already a shareabe resource, make it so
1204     */
1205    struct agx_bo *old = rsrc->bo;
1206    if (!(old->flags & AGX_BO_SHAREABLE)) {
1207       assert(rsrc->layout.levels == 1 &&
1208              "Shared resources must not be mipmapped");
1209       assert(rsrc->layout.sample_count_sa == 1 &&
1210              "Shared resources must not be multisampled");
1211       assert(rsrc->bo);
1212       assert(!(pres->bind & PIPE_BIND_SHARED));
1213 
1214       struct pipe_resource templ = *pres;
1215       templ.bind |= PIPE_BIND_SHARED;
1216       transition_resource(pctx, rsrc, &templ);
1217    } else {
1218       /* Otherwise just claim it's already shared */
1219       pres->bind |= PIPE_BIND_SHARED;
1220       agx_flush_writer(agx_context(pctx), rsrc, "flush_resource");
1221    }
1222 }
1223 
1224 #define MAX_ATTACHMENTS 16
1225 
1226 struct attachments {
1227    struct drm_asahi_attachment list[MAX_ATTACHMENTS];
1228    size_t count;
1229 };
1230 
1231 static void
asahi_add_attachment(struct attachments * att,struct agx_resource * rsrc,struct pipe_surface * surf)1232 asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc,
1233                      struct pipe_surface *surf)
1234 {
1235    assert(att->count < MAX_ATTACHMENTS);
1236    int idx = att->count++;
1237 
1238    att->list[idx].size = rsrc->layout.size_B;
1239    att->list[idx].pointer = rsrc->bo->va->addr;
1240    att->list[idx].order = 1; // TODO: What does this do?
1241    att->list[idx].flags = 0;
1242 }
1243 
1244 static bool
is_aligned(unsigned x,unsigned pot_alignment)1245 is_aligned(unsigned x, unsigned pot_alignment)
1246 {
1247    assert(util_is_power_of_two_nonzero(pot_alignment));
1248    return (x & (pot_alignment - 1)) == 0;
1249 }
1250 
1251 static void
agx_cmdbuf(struct agx_device * dev,struct drm_asahi_cmd_render * c,struct attachments * att,struct agx_pool * pool,struct agx_batch * batch,struct pipe_framebuffer_state * framebuffer,uint64_t encoder_ptr,uint64_t encoder_id,uint64_t cmd_ta_id,uint64_t cmd_3d_id,uint64_t scissor_ptr,uint64_t depth_bias_ptr,uint64_t visibility_result_ptr,struct asahi_bg_eot pipeline_clear,struct asahi_bg_eot pipeline_load,struct asahi_bg_eot pipeline_store,bool clear_pipeline_textures,double clear_depth,unsigned clear_stencil,struct agx_tilebuffer_layout * tib)1252 agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
1253            struct attachments *att, struct agx_pool *pool,
1254            struct agx_batch *batch, struct pipe_framebuffer_state *framebuffer,
1255            uint64_t encoder_ptr, uint64_t encoder_id, uint64_t cmd_ta_id,
1256            uint64_t cmd_3d_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr,
1257            uint64_t visibility_result_ptr, struct asahi_bg_eot pipeline_clear,
1258            struct asahi_bg_eot pipeline_load,
1259            struct asahi_bg_eot pipeline_store, bool clear_pipeline_textures,
1260            double clear_depth, unsigned clear_stencil,
1261            struct agx_tilebuffer_layout *tib)
1262 {
1263    memset(c, 0, sizeof(*c));
1264 
1265    c->encoder_ptr = encoder_ptr;
1266    c->encoder_id = encoder_id;
1267    c->cmd_3d_id = cmd_3d_id;
1268    c->cmd_ta_id = cmd_ta_id;
1269 
1270    c->fragment_usc_base = dev->shader_base;
1271    c->vertex_usc_base = dev->shader_base;
1272 
1273    /* bit 0 specifies OpenGL clip behaviour. Since ARB_clip_control is
1274     * advertised, we don't set it and lower in the vertex shader.
1275     */
1276    c->ppp_ctrl = 0x202;
1277 
1278    c->fb_width = framebuffer->width;
1279    c->fb_height = framebuffer->height;
1280 
1281    c->iogpu_unk_214 = 0xc000;
1282 
1283    c->isp_bgobjvals = 0x300;
1284 
1285    struct agx_resource *zres = NULL, *sres = NULL;
1286 
1287    agx_pack(&c->zls_ctrl, ZLS_CONTROL, zls_control) {
1288 
1289       if (framebuffer->zsbuf) {
1290          struct pipe_surface *zsbuf = framebuffer->zsbuf;
1291          struct agx_resource *zsres = agx_resource(zsbuf->texture);
1292 
1293          unsigned level = zsbuf->u.tex.level;
1294          unsigned first_layer = zsbuf->u.tex.first_layer;
1295 
1296          const struct util_format_description *desc = util_format_description(
1297             agx_resource(zsbuf->texture)->layout.format);
1298 
1299          assert(desc->format == PIPE_FORMAT_Z32_FLOAT ||
1300                 desc->format == PIPE_FORMAT_Z16_UNORM ||
1301                 desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
1302                 desc->format == PIPE_FORMAT_S8_UINT);
1303 
1304          c->depth_dimensions =
1305             (framebuffer->width - 1) | ((framebuffer->height - 1) << 15);
1306 
1307          if (util_format_has_depth(desc))
1308             zres = zsres;
1309          else
1310             sres = zsres;
1311 
1312          if (zsres->separate_stencil)
1313             sres = zsres->separate_stencil;
1314 
1315          if (zres) {
1316             bool clear = (batch->clear & PIPE_CLEAR_DEPTH);
1317             bool load = (batch->load & PIPE_CLEAR_DEPTH);
1318 
1319             zls_control.z_store_enable = (batch->resolve & PIPE_CLEAR_DEPTH);
1320             zls_control.z_load_enable = !clear && load;
1321 
1322             c->depth_buffer_load = agx_map_texture_gpu(zres, first_layer) +
1323                                    ail_get_level_offset_B(&zres->layout, level);
1324 
1325             c->depth_buffer_store = c->depth_buffer_load;
1326             c->depth_buffer_partial = c->depth_buffer_load;
1327 
1328             /* Main stride in pages */
1329             assert((zres->layout.depth_px == 1 ||
1330                     is_aligned(zres->layout.layer_stride_B, AIL_PAGESIZE)) &&
1331                    "Page aligned Z layers");
1332 
1333             unsigned stride_pages = zres->layout.layer_stride_B / AIL_PAGESIZE;
1334             c->depth_buffer_load_stride = ((stride_pages - 1) << 14) | 1;
1335             c->depth_buffer_store_stride = c->depth_buffer_load_stride;
1336             c->depth_buffer_partial_stride = c->depth_buffer_load_stride;
1337 
1338             assert(zres->layout.tiling != AIL_TILING_LINEAR && "must tile");
1339 
1340             if (ail_is_compressed(&zres->layout)) {
1341                c->depth_meta_buffer_load =
1342                   agx_map_texture_gpu(zres, 0) +
1343                   zres->layout.metadata_offset_B +
1344                   (first_layer * zres->layout.compression_layer_stride_B) +
1345                   zres->layout.level_offsets_compressed_B[level];
1346 
1347                /* Meta stride in cache lines */
1348                assert(is_aligned(zres->layout.compression_layer_stride_B,
1349                                  AIL_CACHELINE) &&
1350                       "Cacheline aligned Z meta layers");
1351                unsigned stride_lines =
1352                   zres->layout.compression_layer_stride_B / AIL_CACHELINE;
1353                c->depth_meta_buffer_load_stride = (stride_lines - 1) << 14;
1354 
1355                c->depth_meta_buffer_store = c->depth_meta_buffer_load;
1356                c->depth_meta_buffer_store_stride =
1357                   c->depth_meta_buffer_load_stride;
1358                c->depth_meta_buffer_partial = c->depth_meta_buffer_load;
1359                c->depth_meta_buffer_partial_stride =
1360                   c->depth_meta_buffer_load_stride;
1361 
1362                zls_control.z_compress_1 = true;
1363                zls_control.z_compress_2 = true;
1364             }
1365 
1366             if (zres->base.format == PIPE_FORMAT_Z16_UNORM) {
1367                const float scale = 0xffff;
1368                c->isp_bgobjdepth =
1369                   (uint16_t)(SATURATE(clear_depth) * scale + 0.5f);
1370                zls_control.z_format = AGX_ZLS_FORMAT_16;
1371                c->iogpu_unk_214 |= 0x40000;
1372             } else {
1373                c->isp_bgobjdepth = fui(clear_depth);
1374                zls_control.z_format = AGX_ZLS_FORMAT_32F;
1375             }
1376          }
1377 
1378          if (sres) {
1379             bool clear = (batch->clear & PIPE_CLEAR_STENCIL);
1380             bool load = (batch->load & PIPE_CLEAR_STENCIL);
1381 
1382             zls_control.s_store_enable = (batch->resolve & PIPE_CLEAR_STENCIL);
1383             zls_control.s_load_enable = !clear && load;
1384 
1385             c->stencil_buffer_load =
1386                agx_map_texture_gpu(sres, first_layer) +
1387                ail_get_level_offset_B(&sres->layout, level);
1388 
1389             c->stencil_buffer_store = c->stencil_buffer_load;
1390             c->stencil_buffer_partial = c->stencil_buffer_load;
1391 
1392             /* Main stride in pages */
1393             assert((sres->layout.depth_px == 1 ||
1394                     is_aligned(sres->layout.layer_stride_B, AIL_PAGESIZE)) &&
1395                    "Page aligned S layers");
1396             unsigned stride_pages = sres->layout.layer_stride_B / AIL_PAGESIZE;
1397             c->stencil_buffer_load_stride = ((stride_pages - 1) << 14) | 1;
1398             c->stencil_buffer_store_stride = c->stencil_buffer_load_stride;
1399             c->stencil_buffer_partial_stride = c->stencil_buffer_load_stride;
1400 
1401             if (ail_is_compressed(&sres->layout)) {
1402                c->stencil_meta_buffer_load =
1403                   agx_map_texture_gpu(sres, 0) +
1404                   sres->layout.metadata_offset_B +
1405                   (first_layer * sres->layout.compression_layer_stride_B) +
1406                   sres->layout.level_offsets_compressed_B[level];
1407 
1408                /* Meta stride in cache lines */
1409                assert(is_aligned(sres->layout.compression_layer_stride_B,
1410                                  AIL_CACHELINE) &&
1411                       "Cacheline aligned S meta layers");
1412                unsigned stride_lines =
1413                   sres->layout.compression_layer_stride_B / AIL_CACHELINE;
1414                c->stencil_meta_buffer_load_stride = (stride_lines - 1) << 14;
1415 
1416                c->stencil_meta_buffer_store = c->stencil_meta_buffer_load;
1417                c->stencil_meta_buffer_store_stride =
1418                   c->stencil_meta_buffer_load_stride;
1419                c->stencil_meta_buffer_partial = c->stencil_meta_buffer_load;
1420                c->stencil_meta_buffer_partial_stride =
1421                   c->stencil_meta_buffer_load_stride;
1422 
1423                zls_control.s_compress_1 = true;
1424                zls_control.s_compress_2 = true;
1425             }
1426 
1427             c->isp_bgobjvals |= clear_stencil;
1428          }
1429       }
1430    }
1431 
1432    if (clear_pipeline_textures)
1433       c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
1434    else
1435       c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES;
1436 
1437    if (zres && !(batch->clear & PIPE_CLEAR_DEPTH))
1438       c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
1439 
1440    if (sres && !(batch->clear & PIPE_CLEAR_STENCIL))
1441       c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
1442 
1443    if (dev->debug & AGX_DBG_NOCLUSTER)
1444       c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING;
1445 
1446    /* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */
1447    if (tib->nr_samples > 1 && framebuffer->zsbuf)
1448       c->flags |= ASAHI_RENDER_MSAA_ZS;
1449 
1450    memcpy(&c->load_pipeline_bind, &pipeline_clear.counts,
1451           sizeof(struct agx_counts_packed));
1452 
1453    memcpy(&c->store_pipeline_bind, &pipeline_store.counts,
1454           sizeof(struct agx_counts_packed));
1455 
1456    memcpy(&c->partial_reload_pipeline_bind, &pipeline_load.counts,
1457           sizeof(struct agx_counts_packed));
1458 
1459    memcpy(&c->partial_store_pipeline_bind, &pipeline_store.counts,
1460           sizeof(struct agx_counts_packed));
1461 
1462    /* XXX is this correct? */
1463    c->load_pipeline = pipeline_clear.usc | (framebuffer->nr_cbufs >= 4 ? 8 : 4);
1464    c->store_pipeline = pipeline_store.usc | 4;
1465    c->partial_reload_pipeline = pipeline_load.usc | 4;
1466    c->partial_store_pipeline = pipeline_store.usc | 4;
1467 
1468    c->utile_width = tib->tile_size.width;
1469    c->utile_height = tib->tile_size.height;
1470 
1471    c->samples = tib->nr_samples;
1472    c->layers = MAX2(util_framebuffer_get_num_layers(framebuffer), 1);
1473 
1474    c->ppp_multisamplectl = batch->uniforms.ppp_multisamplectl;
1475    c->sample_size = tib->sample_size_B;
1476 
1477    /* XXX OR 0x80 with eMRT? */
1478    c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(tib), 2048) / 2048;
1479 
1480    float tan_60 = 1.732051f;
1481    c->merge_upper_x = fui(tan_60 / framebuffer->width);
1482    c->merge_upper_y = fui(tan_60 / framebuffer->height);
1483 
1484    c->scissor_array = scissor_ptr;
1485    c->depth_bias_array = depth_bias_ptr;
1486    c->visibility_result_buffer = visibility_result_ptr;
1487 
1488    c->vertex_sampler_array =
1489       batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0;
1490    c->vertex_sampler_count = batch->sampler_heap.count;
1491    c->vertex_sampler_max = batch->sampler_heap.count + 1;
1492 
1493    /* In the future we could split the heaps if useful */
1494    c->fragment_sampler_array = c->vertex_sampler_array;
1495    c->fragment_sampler_count = c->vertex_sampler_count;
1496    c->fragment_sampler_max = c->vertex_sampler_max;
1497 
1498    /* If a tile is empty, we do not want to process it, as the redundant
1499     * roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of
1500     * memory bandwidth. Any draw marks a tile as non-empty, so we only need to
1501     * process empty tiles if the background+EOT programs have a side effect.
1502     * This is the case exactly when there is an attachment we are clearing (some
1503     * attachment A in clear and in resolve <==> non-empty intersection).
1504     *
1505     * This case matters a LOT for performance in workloads that split batches.
1506     */
1507    if (batch->clear & batch->resolve)
1508       c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES;
1509 
1510    for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) {
1511       if (!framebuffer->cbufs[i])
1512          continue;
1513 
1514       asahi_add_attachment(att, agx_resource(framebuffer->cbufs[i]->texture),
1515                            framebuffer->cbufs[i]);
1516    }
1517 
1518    if (framebuffer->zsbuf) {
1519       struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture);
1520 
1521       asahi_add_attachment(att, rsrc, framebuffer->zsbuf);
1522 
1523       if (rsrc->separate_stencil) {
1524          asahi_add_attachment(att, rsrc->separate_stencil, framebuffer->zsbuf);
1525       }
1526    }
1527 
1528    c->fragment_attachments = (uint64_t)(uintptr_t)&att->list[0];
1529    c->fragment_attachment_count = att->count;
1530 
1531    if (batch->vs_scratch) {
1532       c->flags |= ASAHI_RENDER_VERTEX_SPILLS;
1533       c->vertex_helper_arg = batch->ctx->scratch_vs.buf->va->addr;
1534       c->vertex_helper_cfg = batch->vs_preamble_scratch << 16;
1535       c->vertex_helper_program = dev->helper->va->addr | 1;
1536    }
1537    if (batch->fs_scratch) {
1538       c->fragment_helper_arg = batch->ctx->scratch_fs.buf->va->addr;
1539       c->fragment_helper_cfg = batch->fs_preamble_scratch << 16;
1540       c->fragment_helper_program = dev->helper->va->addr | 1;
1541    }
1542 }
1543 
1544 /*
1545  * context
1546  */
1547 static void
agx_flush(struct pipe_context * pctx,struct pipe_fence_handle ** fence,unsigned flags)1548 agx_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
1549           unsigned flags)
1550 {
1551    struct agx_context *ctx = agx_context(pctx);
1552    struct agx_screen *screen = agx_screen(ctx->base.screen);
1553 
1554    agx_flush_all(ctx, "Gallium flush");
1555 
1556    if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC)) &&
1557        ctx->flush_last_seqid) {
1558       /* Ensure other contexts in this screen serialize against the last
1559        * submission (and all prior submissions).
1560        */
1561       simple_mtx_lock(&screen->flush_seqid_lock);
1562 
1563       uint64_t val = p_atomic_read(&screen->flush_wait_seqid);
1564       if (val < ctx->flush_last_seqid)
1565          p_atomic_set(&screen->flush_wait_seqid, ctx->flush_last_seqid);
1566 
1567       /* Note: it's possible for the max() logic above to be "wrong" due
1568        * to a race in agx_batch_submit causing out-of-order timeline point
1569        * updates, making the larger value not actually a later submission.
1570        * However, see the comment in agx_batch.c for why this doesn't matter
1571        * because this corner case is handled conservatively in the kernel.
1572        */
1573 
1574       simple_mtx_unlock(&screen->flush_seqid_lock);
1575 
1576       /* Optimization: Avoid serializing against our own queue by
1577        * recording the last seen foreign seqid when flushing, and our own
1578        * flush seqid. If we then try to sync against our own seqid, we'll
1579        * instead sync against the last possible foreign one. This is *not*
1580        * the `val` we got above, because another context might flush with a
1581        * seqid between `val` and `flush_last_seqid` (which would not update
1582        * `flush_wait_seqid` per the logic above). This is somewhat
1583        * conservative: it means that if *any* foreign context flushes, then
1584        * on next flush of this context we will start waiting for *all*
1585        * prior submits on *all* contexts (even if unflushed) at that point,
1586        * including any local submissions prior to the latest one. That's
1587        * probably fine, it creates a one-time "wait for the second-previous
1588        * batch" wait on this queue but that still allows for at least
1589        * the previous batch to pipeline on the GPU and it's one-time
1590        * until another foreign flush happens. Phew.
1591        */
1592       if (val && val != ctx->flush_my_seqid)
1593          ctx->flush_other_seqid = ctx->flush_last_seqid - 1;
1594 
1595       ctx->flush_my_seqid = ctx->flush_last_seqid;
1596    }
1597 
1598    /* At this point all pending work has been submitted. Since jobs are
1599     * started and completed sequentially from a UAPI perspective, and since
1600     * we submit all jobs with compute+render barriers on the prior job,
1601     * waiting on the last submitted job is sufficient to guarantee completion
1602     * of all GPU work thus far, so we can create a fence out of the latest
1603     * syncobj.
1604     *
1605     * See this page for more info on how the GPU/UAPI queueing works:
1606     * https://github.com/AsahiLinux/docs/wiki/SW:AGX-driver-notes#queues
1607     */
1608 
1609    if (fence) {
1610       struct pipe_fence_handle *f = agx_fence_create(ctx);
1611       pctx->screen->fence_reference(pctx->screen, fence, NULL);
1612       *fence = f;
1613    }
1614 }
1615 
1616 static void
agx_flush_compute(struct agx_context * ctx,struct agx_batch * batch,struct drm_asahi_cmd_compute * cmdbuf)1617 agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch,
1618                   struct drm_asahi_cmd_compute *cmdbuf)
1619 {
1620    struct agx_device *dev = agx_device(ctx->base.screen);
1621 
1622    /* Finalize the encoder */
1623    agx_pack(batch->cdm.current, CDM_STREAM_TERMINATE, _)
1624       ;
1625 
1626    agx_batch_add_bo(batch, batch->cdm.bo);
1627 
1628    if (batch->cs_scratch)
1629       agx_batch_add_bo(batch, ctx->scratch_cs.buf);
1630 
1631    unsigned cmdbuf_id = agx_get_global_id(dev);
1632    unsigned encoder_id = agx_get_global_id(dev);
1633 
1634    *cmdbuf = (struct drm_asahi_cmd_compute){
1635       .flags = 0,
1636       .encoder_ptr = batch->cdm.bo->va->addr,
1637       .encoder_end = batch->cdm.bo->va->addr +
1638                      (batch->cdm.current - (uint8_t *)batch->cdm.bo->map),
1639       .usc_base = dev->shader_base,
1640       .helper_arg = 0,
1641       .helper_cfg = 0,
1642       .helper_program = 0,
1643       .iogpu_unk_40 = 0,
1644       .sampler_array =
1645          batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0,
1646       .sampler_count = batch->sampler_heap.count,
1647       .sampler_max = batch->sampler_heap.count + 1,
1648       .encoder_id = encoder_id,
1649       .cmd_id = cmdbuf_id,
1650       .unk_mask = 0xffffffff,
1651    };
1652 
1653    if (batch->cs_scratch) {
1654       // The commented out lines *may* be related to subgroup-level preemption,
1655       // which we can't support without implementing threadgroup memory in the
1656       // helper. Disable them for now.
1657 
1658       // cmdbuf->iogpu_unk_40 = 0x1c;
1659       cmdbuf->helper_arg = ctx->scratch_cs.buf->va->addr;
1660       cmdbuf->helper_cfg = batch->cs_preamble_scratch << 16;
1661       // cmdbuf->helper_cfg |= 0x40;
1662       cmdbuf->helper_program = dev->helper->va->addr | 1;
1663    }
1664 }
1665 
1666 static void
agx_flush_render(struct agx_context * ctx,struct agx_batch * batch,struct drm_asahi_cmd_render * cmdbuf,struct attachments * att)1667 agx_flush_render(struct agx_context *ctx, struct agx_batch *batch,
1668                  struct drm_asahi_cmd_render *cmdbuf, struct attachments *att)
1669 {
1670    struct agx_device *dev = agx_device(ctx->base.screen);
1671 
1672    if (batch->vs_scratch)
1673       agx_batch_add_bo(batch, ctx->scratch_vs.buf);
1674    if (batch->fs_scratch)
1675       agx_batch_add_bo(batch, ctx->scratch_fs.buf);
1676 
1677    assert(batch->initialized);
1678 
1679    /* Finalize the encoder */
1680    uint8_t stop[5 + 64] = {0x00, 0x00, 0x00, 0xc0, 0x00};
1681    memcpy(batch->vdm.current, stop, sizeof(stop));
1682 
1683    struct asahi_bg_eot pipeline_background =
1684       agx_build_bg_eot(batch, false, false);
1685 
1686    struct asahi_bg_eot pipeline_background_partial =
1687       agx_build_bg_eot(batch, false, true);
1688 
1689    struct asahi_bg_eot pipeline_store = agx_build_bg_eot(batch, true, false);
1690 
1691    bool clear_pipeline_textures =
1692       agx_tilebuffer_spills(&batch->tilebuffer_layout);
1693 
1694    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1695       struct pipe_surface *surf = batch->key.cbufs[i];
1696 
1697       clear_pipeline_textures |=
1698          surf && surf->texture && !(batch->clear & (PIPE_CLEAR_COLOR0 << i));
1699    }
1700 
1701    /* Scissor and depth bias arrays are staged to dynamic arrays on the CPU. At
1702     * submit time, they're done growing and are uploaded to GPU memory attached
1703     * to the batch.
1704     */
1705    uint64_t scissor = agx_pool_upload_aligned(&batch->pool, batch->scissor.data,
1706                                               batch->scissor.size, 64);
1707    uint64_t zbias = agx_pool_upload_aligned(
1708       &batch->pool, batch->depth_bias.data, batch->depth_bias.size, 64);
1709 
1710    /* BO list for a given batch consists of:
1711     *  - BOs for the batch's pools
1712     *  - BOs for the encoder
1713     *  - BO for internal shaders
1714     *  - BOs added to the batch explicitly
1715     */
1716    agx_batch_add_bo(batch, batch->vdm.bo);
1717 
1718    unsigned cmd_ta_id = agx_get_global_id(dev);
1719    unsigned cmd_3d_id = agx_get_global_id(dev);
1720    unsigned encoder_id = agx_get_global_id(dev);
1721 
1722    agx_cmdbuf(dev, cmdbuf, att, &batch->pool, batch, &batch->key,
1723               batch->vdm.bo->va->addr, encoder_id, cmd_ta_id, cmd_3d_id,
1724               scissor, zbias, agx_get_occlusion_heap(batch),
1725               pipeline_background, pipeline_background_partial, pipeline_store,
1726               clear_pipeline_textures, batch->clear_depth, batch->clear_stencil,
1727               &batch->tilebuffer_layout);
1728 }
1729 
1730 void
agx_flush_batch(struct agx_context * ctx,struct agx_batch * batch)1731 agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
1732 {
1733    assert(agx_batch_is_active(batch));
1734    assert(!agx_batch_is_submitted(batch));
1735 
1736    struct attachments att = {.count = 0};
1737    struct drm_asahi_cmd_render render;
1738    struct drm_asahi_cmd_compute compute;
1739    bool has_vdm = false, has_cdm = false;
1740 
1741    if (batch->cdm.bo) {
1742       agx_flush_compute(ctx, batch, &compute);
1743       has_cdm = true;
1744    }
1745 
1746    if (batch->vdm.bo && (batch->clear || batch->initialized)) {
1747       agx_flush_render(ctx, batch, &render, &att);
1748       has_vdm = true;
1749    }
1750 
1751    if (!has_cdm && !has_vdm) {
1752       agx_batch_reset(ctx, batch);
1753       return;
1754    }
1755 
1756    agx_batch_submit(ctx, batch, has_cdm ? &compute : NULL,
1757                     has_vdm ? &render : NULL);
1758 }
1759 
1760 static void
agx_destroy_context(struct pipe_context * pctx)1761 agx_destroy_context(struct pipe_context *pctx)
1762 {
1763    struct agx_device *dev = agx_device(pctx->screen);
1764    struct agx_context *ctx = agx_context(pctx);
1765    struct agx_screen *screen = agx_screen(pctx->screen);
1766 
1767    /* Batch state needs to be freed on completion, and we don't want to yank
1768     * buffers out from in-progress GPU jobs to avoid faults, so just wait until
1769     * everything in progress is actually done on context destroy. This will
1770     * ensure everything is cleaned up properly.
1771     */
1772    agx_sync_all(ctx, "destroy context");
1773 
1774    if (pctx->stream_uploader)
1775       u_upload_destroy(pctx->stream_uploader);
1776 
1777    if (ctx->blitter)
1778       util_blitter_destroy(ctx->blitter);
1779 
1780    util_unreference_framebuffer_state(&ctx->framebuffer);
1781 
1782    agx_bg_eot_cleanup(&ctx->bg_eot);
1783    agx_destroy_meta_shaders(ctx);
1784 
1785    agx_bo_unreference(dev, ctx->result_buf);
1786 
1787    /* Lock around the syncobj destruction, to avoid racing
1788     * command submission in another context.
1789     **/
1790    u_rwlock_wrlock(&screen->destroy_lock);
1791 
1792    drmSyncobjDestroy(dev->fd, ctx->in_sync_obj);
1793    drmSyncobjDestroy(dev->fd, ctx->dummy_syncobj);
1794    if (ctx->in_sync_fd != -1)
1795       close(ctx->in_sync_fd);
1796 
1797    for (unsigned i = 0; i < AGX_MAX_BATCHES; ++i) {
1798       if (ctx->batches.slots[i].syncobj)
1799          drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj);
1800    }
1801 
1802    u_rwlock_wrunlock(&screen->destroy_lock);
1803 
1804    pipe_resource_reference(&ctx->heap, NULL);
1805 
1806    agx_scratch_fini(&ctx->scratch_vs);
1807    agx_scratch_fini(&ctx->scratch_fs);
1808    agx_scratch_fini(&ctx->scratch_cs);
1809 
1810    agx_destroy_command_queue(dev, ctx->queue_id);
1811 
1812    ralloc_free(ctx);
1813 }
1814 
1815 static void
agx_invalidate_resource(struct pipe_context * pctx,struct pipe_resource * resource)1816 agx_invalidate_resource(struct pipe_context *pctx,
1817                         struct pipe_resource *resource)
1818 {
1819    struct agx_context *ctx = agx_context(pctx);
1820    struct agx_batch *batch = agx_get_batch(ctx);
1821 
1822    /* Handle the glInvalidateFramebuffer case */
1823    if (batch->key.zsbuf && batch->key.zsbuf->texture == resource)
1824       batch->resolve &= ~PIPE_CLEAR_DEPTHSTENCIL;
1825 
1826    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
1827       struct pipe_surface *surf = batch->key.cbufs[i];
1828 
1829       if (surf && surf->texture == resource)
1830          batch->resolve &= ~(PIPE_CLEAR_COLOR0 << i);
1831    }
1832 }
1833 
1834 static enum pipe_reset_status
asahi_get_device_reset_status(struct pipe_context * pipe)1835 asahi_get_device_reset_status(struct pipe_context *pipe)
1836 {
1837    struct agx_context *ctx = agx_context(pipe);
1838 
1839    return ctx->any_faults ? PIPE_GUILTY_CONTEXT_RESET : PIPE_NO_RESET;
1840 }
1841 
1842 static struct pipe_context *
agx_create_context(struct pipe_screen * screen,void * priv,unsigned flags)1843 agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
1844 {
1845    struct agx_context *ctx = rzalloc(NULL, struct agx_context);
1846    struct pipe_context *pctx = &ctx->base;
1847    int ret;
1848 
1849    if (!ctx)
1850       return NULL;
1851 
1852    pctx->screen = screen;
1853    pctx->priv = priv;
1854 
1855    util_dynarray_init(&ctx->writer, ctx);
1856    util_dynarray_init(&ctx->global_buffers, ctx);
1857 
1858    pctx->stream_uploader = u_upload_create_default(pctx);
1859    if (!pctx->stream_uploader) {
1860       FREE(pctx);
1861       return NULL;
1862    }
1863    pctx->const_uploader = pctx->stream_uploader;
1864 
1865    uint32_t priority = 2;
1866    if (flags & PIPE_CONTEXT_PRIORITY_LOW)
1867       priority = 3;
1868    else if (flags & PIPE_CONTEXT_PRIORITY_MEDIUM)
1869       priority = 2;
1870    else if (flags & PIPE_CONTEXT_PRIORITY_HIGH)
1871       priority = 1;
1872 
1873    ctx->queue_id = agx_create_command_queue(agx_device(screen),
1874                                             DRM_ASAHI_QUEUE_CAP_RENDER |
1875                                                DRM_ASAHI_QUEUE_CAP_BLIT |
1876                                                DRM_ASAHI_QUEUE_CAP_COMPUTE,
1877                                             priority);
1878 
1879    pctx->destroy = agx_destroy_context;
1880    pctx->flush = agx_flush;
1881    pctx->clear = agx_clear;
1882    pctx->resource_copy_region = agx_resource_copy_region;
1883    pctx->blit = agx_blit;
1884    pctx->flush_resource = agx_flush_resource;
1885 
1886    pctx->buffer_map = u_transfer_helper_transfer_map;
1887    pctx->buffer_unmap = u_transfer_helper_transfer_unmap;
1888    pctx->texture_map = u_transfer_helper_transfer_map;
1889    pctx->texture_unmap = u_transfer_helper_transfer_unmap;
1890    pctx->transfer_flush_region = u_transfer_helper_transfer_flush_region;
1891 
1892    pctx->buffer_subdata = u_default_buffer_subdata;
1893    pctx->clear_buffer = u_default_clear_buffer;
1894    pctx->texture_subdata = u_default_texture_subdata;
1895    pctx->set_debug_callback = u_default_set_debug_callback;
1896    pctx->get_sample_position = u_default_get_sample_position;
1897    pctx->invalidate_resource = agx_invalidate_resource;
1898    pctx->memory_barrier = agx_memory_barrier;
1899 
1900    pctx->create_fence_fd = agx_create_fence_fd;
1901    pctx->fence_server_sync = agx_fence_server_sync;
1902 
1903    pctx->get_device_reset_status = asahi_get_device_reset_status;
1904 
1905    agx_init_state_functions(pctx);
1906    agx_init_query_functions(pctx);
1907    agx_init_streamout_functions(pctx);
1908 
1909    agx_bg_eot_init(&ctx->bg_eot, agx_device(screen));
1910    agx_init_meta_shaders(ctx);
1911 
1912    ctx->blitter = util_blitter_create(pctx);
1913    ctx->compute_blitter.blit_cs = asahi_blit_key_table_create(ctx);
1914 
1915    ctx->result_buf =
1916       agx_bo_create(agx_device(screen),
1917                     (2 * sizeof(union agx_batch_result)) * AGX_MAX_BATCHES, 0,
1918                     AGX_BO_WRITEBACK, "Batch result buffer");
1919    assert(ctx->result_buf);
1920 
1921    /* Sync object/FD used for NATIVE_FENCE_FD. */
1922    ctx->in_sync_fd = -1;
1923    ret = drmSyncobjCreate(agx_device(screen)->fd, 0, &ctx->in_sync_obj);
1924    assert(!ret);
1925 
1926    /* Dummy sync object used before any work has been submitted. */
1927    ret = drmSyncobjCreate(agx_device(screen)->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
1928                           &ctx->dummy_syncobj);
1929    assert(!ret);
1930    ctx->syncobj = ctx->dummy_syncobj;
1931 
1932    /* By default all samples are enabled */
1933    ctx->sample_mask = ~0;
1934 
1935    ctx->support_lod_bias = !(flags & PIPE_CONTEXT_NO_LOD_BIAS);
1936    ctx->robust = (flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS);
1937 
1938    agx_scratch_init(agx_device(screen), &ctx->scratch_vs);
1939    agx_scratch_init(agx_device(screen), &ctx->scratch_fs);
1940    agx_scratch_init(agx_device(screen), &ctx->scratch_cs);
1941 
1942    return pctx;
1943 }
1944 
1945 static const char *
agx_get_vendor(struct pipe_screen * pscreen)1946 agx_get_vendor(struct pipe_screen *pscreen)
1947 {
1948    return "Mesa";
1949 }
1950 
1951 static const char *
agx_get_device_vendor(struct pipe_screen * pscreen)1952 agx_get_device_vendor(struct pipe_screen *pscreen)
1953 {
1954    return "Apple";
1955 }
1956 
1957 static const char *
agx_get_name(struct pipe_screen * pscreen)1958 agx_get_name(struct pipe_screen *pscreen)
1959 {
1960    struct agx_device *dev = agx_device(pscreen);
1961 
1962    return dev->name;
1963 }
1964 
1965 static void
agx_query_memory_info(struct pipe_screen * pscreen,struct pipe_memory_info * info)1966 agx_query_memory_info(struct pipe_screen *pscreen,
1967                       struct pipe_memory_info *info)
1968 {
1969    uint64_t mem_B = 0;
1970    os_get_total_physical_memory(&mem_B);
1971 
1972    uint64_t mem_kB = mem_B / 1024;
1973 
1974    *info = (struct pipe_memory_info){
1975       .total_device_memory = mem_kB,
1976       .avail_device_memory = mem_kB,
1977    };
1978 }
1979 
1980 static int
agx_get_param(struct pipe_screen * pscreen,enum pipe_cap param)1981 agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
1982 {
1983    struct agx_device *dev = agx_device(pscreen);
1984 
1985    switch (param) {
1986    case PIPE_CAP_CLIP_HALFZ:
1987    case PIPE_CAP_NPOT_TEXTURES:
1988    case PIPE_CAP_SHADER_STENCIL_EXPORT:
1989    case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
1990    case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
1991    case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
1992    case PIPE_CAP_DEPTH_CLIP_DISABLE:
1993    case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
1994    case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
1995    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
1996    case PIPE_CAP_SHADER_PACK_HALF_FLOAT:
1997    case PIPE_CAP_FS_FINE_DERIVATIVE:
1998    case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
1999    case PIPE_CAP_DOUBLES:
2000       return 1;
2001 
2002    case PIPE_CAP_MAX_RENDER_TARGETS:
2003    case PIPE_CAP_FBFETCH:
2004    case PIPE_CAP_FBFETCH_COHERENT:
2005       return 8;
2006    case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
2007       return 1;
2008 
2009    case PIPE_CAP_OCCLUSION_QUERY:
2010    case PIPE_CAP_QUERY_TIMESTAMP:
2011    case PIPE_CAP_QUERY_TIME_ELAPSED:
2012    case PIPE_CAP_QUERY_SO_OVERFLOW:
2013    case PIPE_CAP_QUERY_MEMORY_INFO:
2014    case PIPE_CAP_PRIMITIVE_RESTART:
2015    case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:
2016    case PIPE_CAP_ANISOTROPIC_FILTER:
2017    case PIPE_CAP_NATIVE_FENCE_FD:
2018    case PIPE_CAP_TEXTURE_BARRIER:
2019       return true;
2020 
2021    case PIPE_CAP_TIMER_RESOLUTION:
2022       /* Timer resolution is the length of a single tick in nanos */
2023       return agx_gpu_time_to_ns(dev, 1);
2024 
2025    case PIPE_CAP_SAMPLER_VIEW_TARGET:
2026    case PIPE_CAP_TEXTURE_SWIZZLE:
2027    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
2028    case PIPE_CAP_INDEP_BLEND_ENABLE:
2029    case PIPE_CAP_INDEP_BLEND_FUNC:
2030    case PIPE_CAP_ACCELERATED:
2031    case PIPE_CAP_UMA:
2032    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
2033    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
2034    case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:
2035    case PIPE_CAP_SHADER_ARRAY_COMPONENTS:
2036    case PIPE_CAP_PACKED_UNIFORMS:
2037    case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
2038    case PIPE_CAP_VS_INSTANCEID:
2039    case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
2040    case PIPE_CAP_CONDITIONAL_RENDER:
2041    case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
2042    case PIPE_CAP_SEAMLESS_CUBE_MAP:
2043    case PIPE_CAP_LOAD_CONSTBUF:
2044    case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
2045    case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
2046    case PIPE_CAP_NULL_TEXTURES:
2047    case PIPE_CAP_TEXTURE_MULTISAMPLE:
2048    case PIPE_CAP_IMAGE_LOAD_FORMATTED:
2049    case PIPE_CAP_IMAGE_STORE_FORMATTED:
2050    case PIPE_CAP_COMPUTE:
2051    case PIPE_CAP_INT64:
2052    case PIPE_CAP_SAMPLE_SHADING:
2053    case PIPE_CAP_START_INSTANCE:
2054    case PIPE_CAP_DRAW_PARAMETERS:
2055    case PIPE_CAP_MULTI_DRAW_INDIRECT:
2056    case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
2057    case PIPE_CAP_CULL_DISTANCE:
2058    case PIPE_CAP_GL_SPIRV:
2059    case PIPE_CAP_POLYGON_OFFSET_CLAMP:
2060       return 1;
2061    case PIPE_CAP_SURFACE_SAMPLE_COUNT:
2062       /* TODO: MSRTT */
2063       return 0;
2064 
2065    case PIPE_CAP_CUBE_MAP_ARRAY:
2066       return 1;
2067 
2068    case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
2069       return 1;
2070 
2071    case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
2072       return PIPE_MAX_SO_BUFFERS;
2073 
2074    case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
2075    case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
2076       return PIPE_MAX_SO_OUTPUTS;
2077 
2078    case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
2079    case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
2080       return 1;
2081 
2082    case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
2083       return 2048;
2084 
2085    case PIPE_CAP_GLSL_FEATURE_LEVEL:
2086    case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:
2087       return 460;
2088    case PIPE_CAP_ESSL_FEATURE_LEVEL:
2089       return 320;
2090 
2091    /* Settings from iris, may need tuning */
2092    case PIPE_CAP_MAX_VERTEX_STREAMS:
2093       return 4;
2094    case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
2095       return 256;
2096    case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
2097       return 1024;
2098    case PIPE_CAP_MAX_GS_INVOCATIONS:
2099       return 32;
2100    case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
2101       return 16;
2102 
2103    case PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT:
2104       return AGX_TEXTURE_BUFFER_MAX_SIZE;
2105 
2106    case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
2107       return 64;
2108 
2109    case PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY:
2110       return 1;
2111 
2112    case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:
2113       return true;
2114 
2115    case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
2116       return 16384;
2117    case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
2118       /* Max 16384x16384 */
2119       return 15;
2120    case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
2121       /* Max 2048x2048x2048 */
2122       return 12;
2123 
2124    case PIPE_CAP_FS_COORD_ORIGIN_UPPER_LEFT:
2125    case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
2126    case PIPE_CAP_TGSI_TEXCOORD:
2127    case PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL:
2128    case PIPE_CAP_FS_POSITION_IS_SYSVAL:
2129       return true;
2130    case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
2131    case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
2132    case PIPE_CAP_FS_POINT_IS_SYSVAL:
2133       return false;
2134 
2135    case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:
2136       return 0xffff;
2137 
2138    case PIPE_CAP_TEXTURE_TRANSFER_MODES:
2139       return PIPE_TEXTURE_TRANSFER_BLIT;
2140 
2141    case PIPE_CAP_ENDIANNESS:
2142       return PIPE_ENDIAN_LITTLE;
2143 
2144    case PIPE_CAP_SHADER_GROUP_VOTE:
2145    case PIPE_CAP_SHADER_BALLOT:
2146       return true;
2147 
2148    case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
2149       return 4;
2150    case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
2151       return -8;
2152    case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
2153       return 7;
2154    case PIPE_CAP_DRAW_INDIRECT:
2155    case PIPE_CAP_TEXTURE_QUERY_SAMPLES:
2156    case PIPE_CAP_TEXTURE_QUERY_LOD:
2157    case PIPE_CAP_TEXTURE_SHADOW_LOD:
2158       return true;
2159 
2160    case PIPE_CAP_MAX_VIEWPORTS:
2161       return AGX_MAX_VIEWPORTS;
2162 
2163    case PIPE_CAP_VIDEO_MEMORY: {
2164       uint64_t system_memory;
2165 
2166       if (!os_get_total_physical_memory(&system_memory))
2167          return 0;
2168 
2169       return (int)(system_memory >> 20);
2170    }
2171 
2172    case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
2173    case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
2174       return true;
2175 
2176    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
2177       return 4;
2178 
2179    case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
2180       return 32;
2181    case PIPE_CAP_MAX_VARYINGS:
2182       /* TODO: Probably should bump to 32? */
2183       return 16;
2184 
2185    case PIPE_CAP_FLATSHADE:
2186    case PIPE_CAP_TWO_SIDED_COLOR:
2187    case PIPE_CAP_ALPHA_TEST:
2188    case PIPE_CAP_CLIP_PLANES:
2189    case PIPE_CAP_NIR_IMAGES_AS_DEREF:
2190       return 0;
2191 
2192    case PIPE_CAP_QUERY_BUFFER_OBJECT:
2193       return true;
2194 
2195    case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
2196       return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
2197 
2198    case PIPE_CAP_SUPPORTED_PRIM_MODES:
2199    case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART:
2200       return BITFIELD_BIT(MESA_PRIM_POINTS) | BITFIELD_BIT(MESA_PRIM_LINES) |
2201              BITFIELD_BIT(MESA_PRIM_LINE_STRIP) |
2202              BITFIELD_BIT(MESA_PRIM_LINE_LOOP) |
2203              BITFIELD_BIT(MESA_PRIM_TRIANGLES) |
2204              BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP) |
2205              BITFIELD_BIT(MESA_PRIM_TRIANGLE_FAN) |
2206              BITFIELD_BIT(MESA_PRIM_LINES_ADJACENCY) |
2207              BITFIELD_BIT(MESA_PRIM_LINE_STRIP_ADJACENCY) |
2208              BITFIELD_BIT(MESA_PRIM_TRIANGLES_ADJACENCY) |
2209              BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) |
2210              BITFIELD_BIT(MESA_PRIM_PATCHES);
2211 
2212    case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
2213       return 1;
2214 
2215    case PIPE_CAP_VS_LAYER_VIEWPORT:
2216    case PIPE_CAP_TES_LAYER_VIEWPORT:
2217       return true;
2218 
2219    case PIPE_CAP_CONTEXT_PRIORITY_MASK:
2220       return PIPE_CONTEXT_PRIORITY_LOW | PIPE_CONTEXT_PRIORITY_MEDIUM |
2221              PIPE_CONTEXT_PRIORITY_HIGH;
2222 
2223    default:
2224       return u_pipe_screen_get_param_defaults(pscreen, param);
2225    }
2226 }
2227 
2228 static float
agx_get_paramf(struct pipe_screen * pscreen,enum pipe_capf param)2229 agx_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
2230 {
2231    switch (param) {
2232    case PIPE_CAPF_MIN_LINE_WIDTH:
2233    case PIPE_CAPF_MIN_LINE_WIDTH_AA:
2234    case PIPE_CAPF_MIN_POINT_SIZE:
2235    case PIPE_CAPF_MIN_POINT_SIZE_AA:
2236       return 1;
2237 
2238    case PIPE_CAPF_POINT_SIZE_GRANULARITY:
2239    case PIPE_CAPF_LINE_WIDTH_GRANULARITY:
2240       return 0.1;
2241 
2242    case PIPE_CAPF_MAX_LINE_WIDTH:
2243    case PIPE_CAPF_MAX_LINE_WIDTH_AA:
2244       return 16.0; /* Off-by-one fixed point 4:4 encoding */
2245 
2246    case PIPE_CAPF_MAX_POINT_SIZE:
2247    case PIPE_CAPF_MAX_POINT_SIZE_AA:
2248       return 511.95f;
2249 
2250    case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
2251       return 16.0;
2252 
2253    case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
2254       return 16.0; /* arbitrary */
2255 
2256    case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:
2257    case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:
2258    case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:
2259       return 0.0f;
2260 
2261    default:
2262       debug_printf("Unexpected PIPE_CAPF %d query\n", param);
2263       return 0.0;
2264    }
2265 }
2266 
2267 static int
agx_get_shader_param(struct pipe_screen * pscreen,enum pipe_shader_type shader,enum pipe_shader_cap param)2268 agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
2269                      enum pipe_shader_cap param)
2270 {
2271    bool is_no16 = agx_device(pscreen)->debug & AGX_DBG_NO16;
2272 
2273    switch (shader) {
2274    case PIPE_SHADER_VERTEX:
2275    case PIPE_SHADER_FRAGMENT:
2276    case PIPE_SHADER_COMPUTE:
2277    case PIPE_SHADER_GEOMETRY:
2278    case PIPE_SHADER_TESS_CTRL:
2279    case PIPE_SHADER_TESS_EVAL:
2280       break;
2281    default:
2282       return false;
2283    }
2284 
2285    /* this is probably not totally correct.. but it's a start: */
2286    switch (param) {
2287    case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
2288    case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
2289    case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
2290    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
2291       return 16384;
2292 
2293    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
2294       return 1024;
2295 
2296    case PIPE_SHADER_CAP_MAX_INPUTS:
2297       return shader == PIPE_SHADER_VERTEX ? 16 : 32;
2298 
2299    case PIPE_SHADER_CAP_MAX_OUTPUTS:
2300       /* For vertex, the spec min/max is 16. We need more to handle dmat3
2301        * correctly, though. The full 32 is undesirable since it would require
2302        * shenanigans to handle.
2303        */
2304       return shader == PIPE_SHADER_FRAGMENT ? 8
2305              : shader == PIPE_SHADER_VERTEX ? 24
2306                                             : 32;
2307 
2308    case PIPE_SHADER_CAP_MAX_TEMPS:
2309       return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
2310 
2311    case PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE:
2312       return 16 * 1024 * sizeof(float);
2313 
2314    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
2315       return 16;
2316 
2317    case PIPE_SHADER_CAP_CONT_SUPPORTED:
2318       return 1;
2319 
2320    case PIPE_SHADER_CAP_SUBROUTINES:
2321    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
2322       return 0;
2323 
2324    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
2325    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
2326    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
2327    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
2328    case PIPE_SHADER_CAP_INTEGERS:
2329       return true;
2330 
2331    case PIPE_SHADER_CAP_FP16:
2332    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
2333    case PIPE_SHADER_CAP_FP16_DERIVATIVES:
2334       return !is_no16;
2335    case PIPE_SHADER_CAP_INT16:
2336       /* GLSL compiler is broken. Flip this on when Panfrost does. */
2337       return false;
2338    case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:
2339       /* This cap is broken, see 9a38dab2d18 ("zink: disable
2340        * PIPE_SHADER_CAP_FP16_CONST_BUFFERS") */
2341       return false;
2342 
2343    case PIPE_SHADER_CAP_INT64_ATOMICS:
2344    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
2345       return 0;
2346 
2347    case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
2348       /* TODO: Enable when fully baked */
2349       if (strcmp(util_get_process_name(), "blender") == 0)
2350          return PIPE_MAX_SAMPLERS;
2351       else if (strcmp(util_get_process_name(), "run") == 0)
2352          return PIPE_MAX_SAMPLERS;
2353       else if (strcasestr(util_get_process_name(), "ryujinx") != NULL)
2354          return PIPE_MAX_SAMPLERS;
2355       else
2356          return 16;
2357 
2358    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
2359       return PIPE_MAX_SHADER_SAMPLER_VIEWS;
2360 
2361    case PIPE_SHADER_CAP_SUPPORTED_IRS:
2362       return (1 << PIPE_SHADER_IR_NIR);
2363 
2364    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
2365       return PIPE_MAX_SHADER_BUFFERS;
2366 
2367    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
2368       return PIPE_MAX_SHADER_IMAGES;
2369 
2370    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
2371    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
2372       return 0;
2373 
2374    default:
2375       /* Other params are unknown */
2376       return 0;
2377    }
2378 
2379    return 0;
2380 }
2381 
2382 static int
agx_get_compute_param(struct pipe_screen * pscreen,enum pipe_shader_ir ir_type,enum pipe_compute_cap param,void * ret)2383 agx_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,
2384                       enum pipe_compute_cap param, void *ret)
2385 {
2386 #define RET(x)                                                                 \
2387    do {                                                                        \
2388       if (ret)                                                                 \
2389          memcpy(ret, x, sizeof(x));                                            \
2390       return sizeof(x);                                                        \
2391    } while (0)
2392 
2393    switch (param) {
2394    case PIPE_COMPUTE_CAP_ADDRESS_BITS:
2395       RET((uint32_t[]){64});
2396 
2397    case PIPE_COMPUTE_CAP_IR_TARGET:
2398       if (ret)
2399          sprintf(ret, "agx");
2400       return strlen("agx") * sizeof(char);
2401 
2402    case PIPE_COMPUTE_CAP_GRID_DIMENSION:
2403       RET((uint64_t[]){3});
2404 
2405    case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
2406       RET(((uint64_t[]){65535, 65535, 65535}));
2407 
2408    case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
2409       RET(((uint64_t[]){1024, 1024, 1024}));
2410 
2411    case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
2412       RET((uint64_t[]){1024});
2413 
2414    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
2415    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: {
2416       uint64_t system_memory;
2417 
2418       if (!os_get_total_physical_memory(&system_memory))
2419          return 0;
2420 
2421       RET((uint64_t[]){system_memory});
2422    }
2423 
2424    case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
2425       RET((uint64_t[]){32768});
2426 
2427    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
2428    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
2429       RET((uint64_t[]){4096});
2430 
2431    case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
2432       RET((uint32_t[]){800 /* MHz -- TODO */});
2433 
2434    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
2435       RET((uint32_t[]){4 /* TODO */});
2436 
2437    case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
2438       RET((uint32_t[]){1});
2439 
2440    case PIPE_COMPUTE_CAP_SUBGROUP_SIZES:
2441       RET((uint32_t[]){32});
2442 
2443    case PIPE_COMPUTE_CAP_MAX_SUBGROUPS:
2444       RET((uint32_t[]){0 /* TODO */});
2445 
2446    case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
2447       RET((uint64_t[]){1024}); // TODO
2448    }
2449 
2450    return 0;
2451 }
2452 
2453 static bool
agx_is_format_supported(struct pipe_screen * pscreen,enum pipe_format format,enum pipe_texture_target target,unsigned sample_count,unsigned storage_sample_count,unsigned usage)2454 agx_is_format_supported(struct pipe_screen *pscreen, enum pipe_format format,
2455                         enum pipe_texture_target target, unsigned sample_count,
2456                         unsigned storage_sample_count, unsigned usage)
2457 {
2458    assert(target == PIPE_BUFFER || target == PIPE_TEXTURE_1D ||
2459           target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D ||
2460           target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_RECT ||
2461           target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE ||
2462           target == PIPE_TEXTURE_CUBE_ARRAY);
2463 
2464    if (sample_count > 1 && sample_count != 4 && sample_count != 2)
2465       return false;
2466 
2467    if (sample_count > 1 && agx_device(pscreen)->debug & AGX_DBG_NOMSAA)
2468       return false;
2469 
2470    if (MAX2(sample_count, 1) != MAX2(storage_sample_count, 1))
2471       return false;
2472 
2473    if ((usage & PIPE_BIND_VERTEX_BUFFER) && !agx_vbo_supports_format(format))
2474       return false;
2475 
2476    /* For framebuffer_no_attachments, fake support for "none" images */
2477    if (format == PIPE_FORMAT_NONE)
2478       return true;
2479 
2480    if (usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW |
2481                 PIPE_BIND_SHADER_IMAGE)) {
2482       enum pipe_format tex_format = format;
2483 
2484       /* Mimic the fixup done in create_sampler_view and u_transfer_helper so we
2485        * advertise GL_OES_texture_stencil8. Alternatively, we could make mesa/st
2486        * less stupid?
2487        */
2488       if (tex_format == PIPE_FORMAT_X24S8_UINT)
2489          tex_format = PIPE_FORMAT_S8_UINT;
2490 
2491       struct ail_pixel_format_entry ent = ail_pixel_format[tex_format];
2492 
2493       if (!ail_is_valid_pixel_format(tex_format))
2494          return false;
2495 
2496       /* RGB32, luminance/alpha/intensity emulated for texture buffers only */
2497       if ((ent.channels == AGX_CHANNELS_R32G32B32_EMULATED ||
2498            util_format_is_luminance(tex_format) ||
2499            util_format_is_alpha(tex_format) ||
2500            util_format_is_luminance_alpha(tex_format) ||
2501            util_format_is_intensity(tex_format)) &&
2502           target != PIPE_BUFFER)
2503          return false;
2504 
2505       /* XXX: sort out rgb9e5 rendering */
2506       if ((usage & PIPE_BIND_RENDER_TARGET) &&
2507           (!ent.renderable || (tex_format == PIPE_FORMAT_R9G9B9E5_FLOAT)))
2508          return false;
2509    }
2510 
2511    if (usage & PIPE_BIND_DEPTH_STENCIL) {
2512       switch (format) {
2513       /* natively supported */
2514       case PIPE_FORMAT_Z16_UNORM:
2515       case PIPE_FORMAT_Z32_FLOAT:
2516       case PIPE_FORMAT_S8_UINT:
2517 
2518       /* lowered by u_transfer_helper to one of the above */
2519       case PIPE_FORMAT_Z24X8_UNORM:
2520       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2521       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2522          break;
2523 
2524       default:
2525          return false;
2526       }
2527    }
2528 
2529    return true;
2530 }
2531 
2532 static void
agx_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * out_count)2533 agx_query_dmabuf_modifiers(struct pipe_screen *screen, enum pipe_format format,
2534                            int max, uint64_t *modifiers,
2535                            unsigned int *external_only, int *out_count)
2536 {
2537    int i;
2538 
2539    if (max == 0) {
2540       *out_count = ARRAY_SIZE(agx_best_modifiers);
2541       return;
2542    }
2543 
2544    for (i = 0; i < ARRAY_SIZE(agx_best_modifiers) && i < max; i++) {
2545       if (external_only)
2546          external_only[i] = 0;
2547 
2548       modifiers[i] = agx_best_modifiers[i];
2549    }
2550 
2551    /* Return the number of modifiers copied */
2552    *out_count = i;
2553 }
2554 
2555 static bool
agx_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)2556 agx_is_dmabuf_modifier_supported(struct pipe_screen *screen, uint64_t modifier,
2557                                  enum pipe_format format, bool *external_only)
2558 {
2559    if (external_only)
2560       *external_only = false;
2561 
2562    for (unsigned i = 0; i < ARRAY_SIZE(agx_best_modifiers); ++i) {
2563       if (agx_best_modifiers[i] == modifier)
2564          return true;
2565    }
2566 
2567    return false;
2568 }
2569 
2570 static void
agx_destroy_screen(struct pipe_screen * pscreen)2571 agx_destroy_screen(struct pipe_screen *pscreen)
2572 {
2573    struct agx_screen *screen = agx_screen(pscreen);
2574 
2575    drmSyncobjDestroy(screen->dev.fd, screen->flush_syncobj);
2576 
2577    if (screen->dev.ro)
2578       screen->dev.ro->destroy(screen->dev.ro);
2579 
2580    u_transfer_helper_destroy(pscreen->transfer_helper);
2581    agx_close_device(&screen->dev);
2582    disk_cache_destroy(screen->disk_cache);
2583    ralloc_free(screen);
2584 }
2585 
2586 static const void *
agx_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)2587 agx_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir,
2588                          enum pipe_shader_type shader)
2589 {
2590    return &agx_nir_options;
2591 }
2592 
2593 static void
agx_resource_set_stencil(struct pipe_resource * prsrc,struct pipe_resource * stencil)2594 agx_resource_set_stencil(struct pipe_resource *prsrc,
2595                          struct pipe_resource *stencil)
2596 {
2597    agx_resource(prsrc)->separate_stencil = agx_resource(stencil);
2598 }
2599 
2600 static struct pipe_resource *
agx_resource_get_stencil(struct pipe_resource * prsrc)2601 agx_resource_get_stencil(struct pipe_resource *prsrc)
2602 {
2603    return (struct pipe_resource *)agx_resource(prsrc)->separate_stencil;
2604 }
2605 
2606 static enum pipe_format
agx_resource_get_internal_format(struct pipe_resource * prsrc)2607 agx_resource_get_internal_format(struct pipe_resource *prsrc)
2608 {
2609    return agx_resource(prsrc)->layout.format;
2610 }
2611 
2612 static struct disk_cache *
agx_get_disk_shader_cache(struct pipe_screen * pscreen)2613 agx_get_disk_shader_cache(struct pipe_screen *pscreen)
2614 {
2615    return agx_screen(pscreen)->disk_cache;
2616 }
2617 
2618 static const struct u_transfer_vtbl transfer_vtbl = {
2619    .resource_create = agx_resource_create,
2620    .resource_destroy = agx_resource_destroy,
2621    .transfer_map = agx_transfer_map,
2622    .transfer_unmap = agx_transfer_unmap,
2623    .transfer_flush_region = agx_transfer_flush_region,
2624    .get_internal_format = agx_resource_get_internal_format,
2625    .set_stencil = agx_resource_set_stencil,
2626    .get_stencil = agx_resource_get_stencil,
2627 };
2628 
2629 static int
agx_screen_get_fd(struct pipe_screen * pscreen)2630 agx_screen_get_fd(struct pipe_screen *pscreen)
2631 {
2632    return agx_device(pscreen)->fd;
2633 }
2634 
2635 static uint64_t
agx_get_timestamp(struct pipe_screen * pscreen)2636 agx_get_timestamp(struct pipe_screen *pscreen)
2637 {
2638    struct agx_device *dev = agx_device(pscreen);
2639    return agx_gpu_time_to_ns(dev, agx_get_gpu_timestamp(dev));
2640 }
2641 
2642 static void
agx_screen_get_device_uuid(struct pipe_screen * pscreen,char * uuid)2643 agx_screen_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
2644 {
2645    agx_get_device_uuid(agx_device(pscreen), uuid);
2646 }
2647 
2648 static void
agx_screen_get_driver_uuid(struct pipe_screen * pscreen,char * uuid)2649 agx_screen_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
2650 {
2651    agx_get_driver_uuid(uuid);
2652 }
2653 
2654 struct pipe_screen *
agx_screen_create(int fd,struct renderonly * ro,const struct pipe_screen_config * config)2655 agx_screen_create(int fd, struct renderonly *ro,
2656                   const struct pipe_screen_config *config)
2657 {
2658    struct agx_screen *agx_screen;
2659    struct pipe_screen *screen;
2660 
2661    /* Refuse to probe. There is no stable UAPI yet. Upstream Mesa cannot be used
2662     * yet with Asahi. Do not try. Do not patch out this check. Do not teach
2663     * others about patching this check. Do not distribute upstream Mesa with
2664     * this check patched out.
2665     */
2666    return NULL;
2667 
2668    agx_screen = rzalloc(NULL, struct agx_screen);
2669    if (!agx_screen)
2670       return NULL;
2671 
2672    screen = &agx_screen->pscreen;
2673 
2674    /* parse driconf configuration now for device specific overrides */
2675    driParseConfigFiles(config->options, config->options_info, 0, "asahi", NULL,
2676                        NULL, NULL, 0, NULL, 0);
2677 
2678    /* Forward no16 flag from driconf */
2679    if (driQueryOptionb(config->options, "no_fp16"))
2680       agx_screen->dev.debug |= AGX_DBG_NO16;
2681 
2682    agx_screen->dev.fd = fd;
2683    agx_screen->dev.ro = ro;
2684    u_rwlock_init(&agx_screen->destroy_lock);
2685 
2686    /* Try to open an AGX device */
2687    if (!agx_open_device(agx_screen, &agx_screen->dev)) {
2688       ralloc_free(agx_screen);
2689       return NULL;
2690    }
2691 
2692    int ret =
2693       drmSyncobjCreate(agx_device(screen)->fd, 0, &agx_screen->flush_syncobj);
2694    assert(!ret);
2695 
2696    simple_mtx_init(&agx_screen->flush_seqid_lock, mtx_plain);
2697 
2698    screen->destroy = agx_destroy_screen;
2699    screen->get_screen_fd = agx_screen_get_fd;
2700    screen->get_name = agx_get_name;
2701    screen->get_vendor = agx_get_vendor;
2702    screen->get_device_vendor = agx_get_device_vendor;
2703    screen->get_param = agx_get_param;
2704    screen->get_shader_param = agx_get_shader_param;
2705    screen->get_compute_param = agx_get_compute_param;
2706    screen->get_paramf = agx_get_paramf;
2707    screen->get_device_uuid = agx_screen_get_device_uuid;
2708    screen->get_driver_uuid = agx_screen_get_driver_uuid;
2709    screen->is_format_supported = agx_is_format_supported;
2710    screen->query_dmabuf_modifiers = agx_query_dmabuf_modifiers;
2711    screen->query_memory_info = agx_query_memory_info;
2712    screen->is_dmabuf_modifier_supported = agx_is_dmabuf_modifier_supported;
2713    screen->context_create = agx_create_context;
2714    screen->resource_from_handle = agx_resource_from_handle;
2715    screen->resource_get_handle = agx_resource_get_handle;
2716    screen->resource_get_param = agx_resource_get_param;
2717    screen->resource_create_with_modifiers = agx_resource_create_with_modifiers;
2718    screen->get_timestamp = agx_get_timestamp;
2719    screen->fence_reference = agx_fence_reference;
2720    screen->fence_finish = agx_fence_finish;
2721    screen->fence_get_fd = agx_fence_get_fd;
2722    screen->get_compiler_options = agx_get_compiler_options;
2723    screen->get_disk_shader_cache = agx_get_disk_shader_cache;
2724 
2725    screen->resource_create = u_transfer_helper_resource_create;
2726    screen->resource_destroy = u_transfer_helper_resource_destroy;
2727    screen->transfer_helper = u_transfer_helper_create(
2728       &transfer_vtbl,
2729       U_TRANSFER_HELPER_SEPARATE_Z32S8 | U_TRANSFER_HELPER_SEPARATE_STENCIL |
2730          U_TRANSFER_HELPER_MSAA_MAP | U_TRANSFER_HELPER_Z24_IN_Z32F);
2731 
2732    agx_disk_cache_init(agx_screen);
2733 
2734    return screen;
2735 }
2736