xref: /aosp_15_r20/external/mesa3d/src/broadcom/simulator/v3dx_simulator.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file v3dx_simulator.c
26  *
27  * Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator.
28  *
29  * The register headers between V3D versions will have conflicting defines, so
30  * all register interactions appear in this file and are compiled per V3D version
31  * we support.
32  */
33 
34 #if USE_V3D_SIMULATOR
35 
36 #include <assert.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 
40 #include "v3d_simulator.h"
41 #include "v3d_simulator_wrapper.h"
42 
43 #include "common/v3d_performance_counters.h"
44 
45 #include "util/macros.h"
46 #include "util/bitscan.h"
47 #include "drm-uapi/v3d_drm.h"
48 
49 #define HW_REGISTER_RO(x) (x)
50 #define HW_REGISTER_RW(x) (x)
51 #if V3D_VERSION == 71
52 #include "libs/core/v3d/registers/7.1.7.0/v3d.h"
53 #else
54 #if V3D_VERSION == 42
55 #include "libs/core/v3d/registers/4.2.14.0/v3d.h"
56 #endif
57 #endif
58 
59 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
60 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
61 
62 /* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
63 static void
v3d_invalidate_l2c(struct v3d_hw * v3d)64 v3d_invalidate_l2c(struct v3d_hw *v3d)
65 {
66         if (V3D_VERSION >= 33)
67                 return;
68 
69         V3D_WRITE(V3D_CTL_0_L2CACTL,
70                   V3D_CTL_0_L2CACTL_L2CCLR_SET |
71                   V3D_CTL_0_L2CACTL_L2CENA_SET);
72 }
73 
74 enum v3d_l2t_cache_flush_mode {
75         V3D_CACHE_FLUSH_MODE_FLUSH,
76         V3D_CACHE_FLUSH_MODE_CLEAR,
77         V3D_CACHE_FLUSH_MODE_CLEAN,
78 };
79 
80 /* Invalidates texture L2 cachelines */
81 static void
v3d_invalidate_l2t(struct v3d_hw * v3d)82 v3d_invalidate_l2t(struct v3d_hw *v3d)
83 {
84         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
85         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
86         V3D_WRITE(V3D_CTL_0_L2TCACTL,
87                   V3D_CTL_0_L2TCACTL_L2TFLS_SET |
88                   (V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
89 }
90 
91 /*
92  * Wait for l2tcactl, used for flushes.
93  *
94  * FIXME: for a multicore scenario we should pass here the core. All wrapper
95  * assumes just one core, so would be better to handle that on that case.
96  */
v3d_core_wait_l2tcactl(struct v3d_hw * v3d,uint32_t ctrl)97 static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d,
98                                           uint32_t ctrl)
99 {
100    assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET)));
101 
102    while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) {
103            v3d_hw_tick(v3d);
104    }
105 }
106 
107 /* Flushes dirty texture cachelines from the L1 write combiner */
108 static void
v3d_flush_l1td(struct v3d_hw * v3d)109 v3d_flush_l1td(struct v3d_hw *v3d)
110 {
111         V3D_WRITE(V3D_CTL_0_L2TCACTL,
112                   V3D_CTL_0_L2TCACTL_TMUWCF_SET);
113 
114         /* Note: here the kernel (and previous versions of the simulator
115          * wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We
116          * understand that it makes more sense to do like this. We need to
117          * confirm which one is doing it correctly. So far things work fine on
118          * the simulator this way.
119          */
120         v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET);
121 }
122 
123 /* Flushes dirty texture L2 cachelines */
124 static void
v3d_flush_l2t(struct v3d_hw * v3d)125 v3d_flush_l2t(struct v3d_hw *v3d)
126 {
127         V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
128         V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
129         V3D_WRITE(V3D_CTL_0_L2TCACTL,
130                   V3D_CTL_0_L2TCACTL_L2TFLS_SET |
131                   (V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
132 
133         v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET);
134 }
135 
136 /* Invalidates the slice caches.  These are read-only caches. */
137 static void
v3d_invalidate_slices(struct v3d_hw * v3d)138 v3d_invalidate_slices(struct v3d_hw *v3d)
139 {
140         V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
141 }
142 
143 static void
v3d_invalidate_caches(struct v3d_hw * v3d)144 v3d_invalidate_caches(struct v3d_hw *v3d)
145 {
146         v3d_invalidate_l2c(v3d);
147         v3d_invalidate_l2t(v3d);
148         v3d_invalidate_slices(v3d);
149 }
150 
151 static uint32_t g_gmp_ofs;
152 static void
v3d_reload_gmp(struct v3d_hw * v3d)153 v3d_reload_gmp(struct v3d_hw *v3d)
154 {
155         /* Completely reset the GMP. */
156         V3D_WRITE(V3D_GMP_CFG,
157                   V3D_GMP_CFG_PROTENABLE_SET);
158         V3D_WRITE(V3D_GMP_TABLE_ADDR, g_gmp_ofs);
159         V3D_WRITE(V3D_GMP_CLEAR_LOAD, ~0);
160         while (V3D_READ(V3D_GMP_STATUS) &
161                V3D_GMP_STATUS_CFG_BUSY_SET) {
162                 ;
163         }
164 }
165 
166 static UNUSED void
v3d_flush_caches(struct v3d_hw * v3d)167 v3d_flush_caches(struct v3d_hw *v3d)
168 {
169         v3d_flush_l1td(v3d);
170         v3d_flush_l2t(v3d);
171 }
172 
173 #if V3D_VERSION < 71
174 #define TFU_REG(NAME) V3D_TFU_ ## NAME
175 #else
176 #define TFU_REG(NAME) V3D_IFC_ ## NAME
177 #endif
178 
179 
180 int
v3dX(simulator_submit_tfu_ioctl)181 v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
182                                  struct drm_v3d_submit_tfu *args)
183 {
184         int last_vtct = V3D_READ(TFU_REG(CS)) & TFU_REG(CS_CVTCT_SET);
185 
186         V3D_WRITE(TFU_REG(IIA), args->iia);
187         V3D_WRITE(TFU_REG(IIS), args->iis);
188         V3D_WRITE(TFU_REG(ICA), args->ica);
189         V3D_WRITE(TFU_REG(IUA), args->iua);
190         V3D_WRITE(TFU_REG(IOA), args->ioa);
191 #if V3D_VERSION >= 71
192         V3D_WRITE(TFU_REG(IOC), args->v71.ioc);
193 #endif
194         V3D_WRITE(TFU_REG(IOS), args->ios);
195         V3D_WRITE(TFU_REG(COEF0), args->coef[0]);
196         V3D_WRITE(TFU_REG(COEF1), args->coef[1]);
197         V3D_WRITE(TFU_REG(COEF2), args->coef[2]);
198         V3D_WRITE(TFU_REG(COEF3), args->coef[3]);
199 
200         V3D_WRITE(TFU_REG(ICFG), args->icfg);
201 
202         while ((V3D_READ(TFU_REG(CS)) & TFU_REG(CS_CVTCT_SET)) == last_vtct) {
203                 v3d_hw_tick(v3d);
204         }
205 
206         return 0;
207 }
208 
209 int
v3dX(simulator_submit_csd_ioctl)210 v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
211                                  struct drm_v3d_submit_csd *args,
212                                  uint32_t gmp_ofs)
213 {
214 #if V3D_VERSION >= 42
215         int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
216                                    V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
217         g_gmp_ofs = gmp_ofs;
218         v3d_reload_gmp(v3d);
219 
220         v3d_invalidate_caches(v3d);
221 
222         V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
223         V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
224         V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
225         V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
226         V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
227         V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
228 #if V3D_VERSION >= 71
229         V3D_WRITE(V3D_CSD_0_QUEUED_CFG7, 0);
230 #endif
231         /* CFG0 kicks off the job */
232         V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
233 
234         /* Now we wait for the dispatch to finish. The safest way is to check
235          * if NUM_COMPLETED_JOBS has increased. Note that in spite of that
236          * name that register field is about the number of completed
237          * dispatches.
238          */
239         while ((V3D_READ(V3D_CSD_0_STATUS) &
240                 V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) {
241                 v3d_hw_tick(v3d);
242         }
243 
244         v3d_flush_caches(v3d);
245 
246         return 0;
247 #else
248         return -1;
249 #endif
250 }
251 
252 int
v3dX(simulator_get_param_ioctl)253 v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
254                                 uint32_t perfcnt_total,
255                                 struct drm_v3d_get_param *args)
256 {
257         static const uint32_t reg_map[] = {
258 #if V3D_VERSION >= 71
259                 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_IDENT0,
260 #else
261                 [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
262 #endif
263                 [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
264                 [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
265                 [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
266                 [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
267                 [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
268                 [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
269         };
270 
271         switch (args->param) {
272         case DRM_V3D_PARAM_SUPPORTS_TFU:
273                 args->value = 1;
274                 return 0;
275         case DRM_V3D_PARAM_SUPPORTS_CSD:
276                 args->value = V3D_VERSION >= 42;
277                 return 0;
278         case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
279                 args->value = 1;
280                 return 0;
281         case DRM_V3D_PARAM_SUPPORTS_PERFMON:
282                 args->value = V3D_VERSION >= 42;
283                 return 0;
284         case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
285                 args->value = 1;
286                 return 0;
287 	case DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE:
288 		args->value = 1;
289 		return 0;
290 	case DRM_V3D_PARAM_MAX_PERF_COUNTERS:
291 		args->value = perfcnt_total;
292 		return 0;
293         }
294 
295         if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
296                 args->value = V3D_READ(reg_map[args->param]);
297                 return 0;
298         }
299 
300         fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n",
301                 (long long)args->value);
302         abort();
303 }
304 
305 int
v3dX(simulator_perfmon_get_counter_ioctl)306 v3dX(simulator_perfmon_get_counter_ioctl)(uint32_t perfcnt_total,
307                                           struct drm_v3d_perfmon_get_counter *args)
308 {
309         const char **counter = NULL;
310 
311         /* Make sure that the counter ID is valid */
312         if (args->counter >= perfcnt_total)
313                 return -1;
314 
315         counter = v3d_performance_counters[args->counter];
316 
317         memcpy(args->name, counter[V3D_PERFCNT_NAME],
318                DRM_V3D_PERFCNT_MAX_NAME);
319 
320         memcpy(args->category, counter[V3D_PERFCNT_CATEGORY],
321                DRM_V3D_PERFCNT_MAX_CATEGORY);
322 
323         memcpy(args->description, counter[V3D_PERFCNT_DESCRIPTION],
324                DRM_V3D_PERFCNT_MAX_DESCRIPTION);
325 
326         return 0;
327 }
328 
329 static struct v3d_hw *v3d_isr_hw;
330 
331 
332 static void
v3d_isr_core(struct v3d_hw * v3d,unsigned core)333 v3d_isr_core(struct v3d_hw *v3d,
334              unsigned core)
335 {
336         /* FIXME: so far we are assuming just one core, and using only the _0_
337          * registers. If we add multiple-core on the simulator, we would need
338          * to pass core as a parameter, and chose the proper registers.
339          */
340         assert(core == 0);
341         uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
342         V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
343 
344         if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
345                 uint32_t size = 256 * 1024;
346                 uint32_t offset = v3d_simulator_get_spill(size);
347 
348                 v3d_reload_gmp(v3d);
349 
350                 V3D_WRITE(V3D_PTB_0_BPOA, offset);
351                 V3D_WRITE(V3D_PTB_0_BPOS, size);
352                 return;
353         }
354 
355 #if V3D_VERSION <= 42
356         if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
357                 fprintf(stderr, "GMP violation at 0x%08x\n",
358                         V3D_READ(V3D_GMP_VIO_ADDR));
359         } else {
360                 fprintf(stderr,
361                         "Unexpected ISR with core status 0x%08x\n",
362                         core_status);
363         }
364         abort();
365 #endif
366 }
367 
368 static void
handle_mmu_interruptions(struct v3d_hw * v3d,uint32_t hub_status)369 handle_mmu_interruptions(struct v3d_hw *v3d,
370                          uint32_t hub_status)
371 {
372         bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET;
373         bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET;
374         bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET;
375 
376         if (!(pti || cap || wrv))
377                 return;
378 
379         const char *client = "?";
380         uint32_t axi_id = V3D_READ(V3D_MMU0_VIO_ID);
381         uint32_t va_width = 30;
382 
383         static const char *const v3d42_axi_ids[] = {
384                 "L2T",
385                 "PTB",
386                 "PSE",
387                 "TLB",
388                 "CLE",
389                 "TFU",
390                 "MMU",
391                 "GMP",
392         };
393 
394         axi_id = axi_id >> 5;
395         if (axi_id < ARRAY_SIZE(v3d42_axi_ids))
396                 client = v3d42_axi_ids[axi_id];
397 
398         uint32_t mmu_debug = V3D_READ(V3D_MMU0_DEBUG_INFO);
399 
400         va_width += ((mmu_debug & V3D_MMU0_DEBUG_INFO_VA_WIDTH_SET)
401                      >> V3D_MMU0_DEBUG_INFO_VA_WIDTH_LSB);
402 
403         /* Only the top bits (final number depends on the gen) of the virtual
404          * address are reported in the MMU VIO_ADDR register.
405          */
406         uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU0_VIO_ADDR) <<
407                              (va_width - 32));
408 
409         /* Difference with the kernel: here were are going to abort after
410          * logging, so we don't bother with some stuff that the kernel does,
411          * like restoring the MMU ctrl bits
412          */
413 
414         fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
415                 client, axi_id, (long long) vio_addr,
416                 wrv ? ", write violation" : "",
417                 pti ? ", pte invalid" : "",
418                 cap ? ", cap exceeded" : "");
419 
420         abort();
421 }
422 
423 static void
v3d_isr_hub(struct v3d_hw * v3d)424 v3d_isr_hub(struct v3d_hw *v3d)
425 {
426         uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS);
427 
428         /* Acknowledge the interrupts we're handling here */
429         V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status);
430 
431         if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) {
432                 /* FIXME: we were not able to raise this exception. We let the
433                  * unreachable here, so we could get one if it is raised on
434                  * the future. In any case, note that for this case we would
435                  * only be doing debugging log.
436                  */
437                 unreachable("TFU Conversion Complete interrupt not handled");
438         }
439 
440         handle_mmu_interruptions(v3d, hub_status);
441 
442 #if V3D_VERSION == 71
443         if (hub_status & V3D_HUB_CTL_INT_STS_INT_GMPV_SET) {
444                 fprintf(stderr, "GMP violation at 0x%08x\n",
445                         V3D_READ(V3D_GMP_VIO_ADDR));
446         } else {
447                 fprintf(stderr,
448                         "Unexpected ISR with status 0x%08x\n",
449                         hub_status);
450         }
451         abort();
452 #endif
453 }
454 
455 static void
v3d_isr(uint32_t hub_status)456 v3d_isr(uint32_t hub_status)
457 {
458         struct v3d_hw *v3d = v3d_isr_hw;
459         uint32_t mask = hub_status;
460 
461         /* Check the hub_status bits */
462         while (mask) {
463                 unsigned core = u_bit_scan(&mask);
464 
465                 if (core == v3d_hw_get_hub_core())
466                         v3d_isr_hub(v3d);
467                 else
468                         v3d_isr_core(v3d, core);
469         }
470 
471         return;
472 }
473 
474 void
v3dX(simulator_init_regs)475 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
476 {
477         /* FIXME: the kernel captures some additional core interrupts here,
478          * for tracing. Perhaps we should evaluate to do the same here and add
479          * some debug options.
480          */
481         uint32_t core_interrupts = V3D_CTL_0_INT_STS_INT_OUTOMEM_SET;
482 #if V3D_VERSION <= 42
483         core_interrupts |= V3D_CTL_0_INT_STS_INT_GMPV_SET;
484 #endif
485 
486         V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
487         V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
488 
489         uint32_t hub_interrupts =
490            (V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET |  /* write violation */
491             V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET |  /* page table invalid */
492             V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET |  /* CAP exceeded */
493             V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
494 
495 #if V3D_VERSION == 71
496         hub_interrupts |= V3D_HUB_CTL_INT_STS_INT_GMPV_SET;
497 #endif
498         V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
499         V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
500 
501         v3d_isr_hw = v3d;
502         v3d_hw_set_isr(v3d, v3d_isr);
503 }
504 
505 void
v3dX(simulator_submit_cl_ioctl)506 v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
507                                 struct drm_v3d_submit_cl *submit,
508                                 uint32_t gmp_ofs)
509 {
510         int last_bfc = (V3D_READ(V3D_CLE_0_BFC) &
511                         V3D_CLE_0_BFC_BMFCT_SET);
512 
513         int last_rfc = (V3D_READ(V3D_CLE_0_RFC) &
514                         V3D_CLE_0_RFC_RMFCT_SET);
515 
516         g_gmp_ofs = gmp_ofs;
517         v3d_reload_gmp(v3d);
518 
519         v3d_invalidate_caches(v3d);
520 
521         if (submit->qma) {
522                 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
523                 V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
524         }
525         if (submit->qts) {
526                 V3D_WRITE(V3D_CLE_0_CT0QTS,
527                           V3D_CLE_0_CT0QTS_CTQTSEN_SET |
528                           submit->qts);
529         }
530         V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
531         V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
532 
533         /* Wait for bin to complete before firing render.  The kernel's
534          * scheduler implements this using the GPU scheduler blocking on the
535          * bin fence completing.  (We don't use HW semaphores).
536          */
537         while ((V3D_READ(V3D_CLE_0_BFC) &
538                 V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) {
539                 v3d_hw_tick(v3d);
540         }
541 
542         v3d_invalidate_caches(v3d);
543 
544         V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
545         V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
546 
547         while ((V3D_READ(V3D_CLE_0_RFC) &
548                 V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) {
549                 v3d_hw_tick(v3d);
550         }
551 }
552 
553 #define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
554 #define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
555 #define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
556 #define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
557                                                  V3D_PCTR_0_SRC_N_SHIFT(x) + \
558                                                  V3D_PCTR_0_SRC_0_3_PCTRS0_MSB))
559 
560 void
v3dX(simulator_perfmon_start)561 v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
562                               uint32_t ncounters,
563                               uint8_t *events)
564 {
565         int i, j;
566         uint32_t source;
567         uint32_t mask = BITFIELD_RANGE(0, ncounters);
568 
569         for (i = 0; i < ncounters; i+=4) {
570                 source = i / 4;
571                 uint32_t channels = 0;
572                 for (j = 0; j < 4 && (i + j) < ncounters; j++)
573                         channels |= events[i + j] << V3D_PCTR_0_SRC_N_SHIFT(j);
574                 V3D_WRITE(V3D_PCTR_0_SRC_N(source), channels);
575         }
576         V3D_WRITE(V3D_PCTR_0_CLR, mask);
577         V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
578         V3D_WRITE(V3D_PCTR_0_EN, mask);
579 }
580 
v3dX(simulator_perfmon_stop)581 void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
582                                   uint32_t ncounters,
583                                   uint64_t *values)
584 {
585         int i;
586 
587         for (i = 0; i < ncounters; i++)
588                 values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
589 
590         V3D_WRITE(V3D_PCTR_0_EN, 0);
591 }
592 
v3dX(simulator_get_perfcnt_total)593 void v3dX(simulator_get_perfcnt_total)(uint32_t *count)
594 {
595         *count = ARRAY_SIZE(v3d_performance_counters);
596 }
597 
598 #endif /* USE_V3D_SIMULATOR */
599