1 /*
2 * Copyright (C) 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Alyssa Rosenzweig <[email protected]>
25 */
26
27 #include "util/macros.h"
28
29 #include "kmod/pan_kmod.h"
30 #include "panfrost/util/pan_ir.h"
31 #include "pan_props.h"
32
33 #include <genxml/gen_macros.h>
34
35 /* Fixed "minimum revisions" */
36 #define NO_ANISO (~0)
37 #define HAS_ANISO (0)
38
39 #define MODEL(gpu_id_, gpu_variant_, shortname, counters_, \
40 min_rev_anisotropic_, tib_size_, quirks_) \
41 { \
42 .gpu_id = gpu_id_, .gpu_variant = gpu_variant_, \
43 .name = "Mali-" shortname " (Panfrost)", \
44 .performance_counters = counters_, \
45 .min_rev_anisotropic = min_rev_anisotropic_, \
46 .tilebuffer_size = tib_size_, .quirks = quirks_, \
47 }
48
49 /* Table of supported Mali GPUs */
50 /* clang-format off */
51 const struct panfrost_model panfrost_model_list[] = {
52 MODEL(0x600, 0, "T600", "T60x", NO_ANISO, 8192, {}),
53 MODEL(0x620, 0, "T620", "T62x", NO_ANISO, 8192, {}),
54 MODEL(0x720, 0, "T720", "T72x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
55 MODEL(0x750, 0, "T760", "T76x", NO_ANISO, 8192, {}),
56 MODEL(0x820, 0, "T820", "T82x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
57 MODEL(0x830, 0, "T830", "T83x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
58 MODEL(0x860, 0, "T860", "T86x", NO_ANISO, 8192, {}),
59 MODEL(0x880, 0, "T880", "T88x", NO_ANISO, 8192, {}),
60
61 MODEL(0x6000, 0, "G71", "TMIx", NO_ANISO, 8192, {}),
62 MODEL(0x6221, 0, "G72", "THEx", 0x0030 /* r0p3 */, 16384, {}),
63 MODEL(0x7090, 0, "G51", "TSIx", 0x1010 /* r1p1 */, 16384, {}),
64 MODEL(0x7093, 0, "G31", "TDVx", HAS_ANISO, 16384, {}),
65 MODEL(0x7211, 0, "G76", "TNOx", HAS_ANISO, 16384, {}),
66 MODEL(0x7212, 0, "G52", "TGOx", HAS_ANISO, 16384, {}),
67 MODEL(0x7402, 0, "G52 r1", "TGOx", HAS_ANISO, 16384, {}),
68 MODEL(0x9091, 0, "G57", "TNAx", HAS_ANISO, 16384, {}),
69 MODEL(0x9093, 0, "G57", "TNAx", HAS_ANISO, 16384, {}),
70
71 MODEL(0xa867, 0, "G610", "TVIx", HAS_ANISO, 32768, {}),
72 MODEL(0xac74, 0, "G310", "TVAx", HAS_ANISO, 16384, {}),
73 MODEL(0xac74, 1, "G310", "TVAx", HAS_ANISO, 16384, {}),
74 MODEL(0xac74, 2, "G310", "TVAx", HAS_ANISO, 16384, {}),
75 MODEL(0xac74, 3, "G310", "TVAx", HAS_ANISO, 32768, {}),
76 MODEL(0xac74, 4, "G310", "TVAx", HAS_ANISO, 32768, {}),
77 };
78 /* clang-format on */
79
80 #undef NO_ANISO
81 #undef HAS_ANISO
82 #undef MODEL
83
84 /*
85 * Look up a supported model by its GPU ID, or return NULL if the model is not
86 * supported at this time.
87 */
88 const struct panfrost_model *
panfrost_get_model(uint32_t gpu_id,uint32_t gpu_variant)89 panfrost_get_model(uint32_t gpu_id, uint32_t gpu_variant)
90 {
91 for (unsigned i = 0; i < ARRAY_SIZE(panfrost_model_list); ++i) {
92 if (panfrost_model_list[i].gpu_id == gpu_id &&
93 panfrost_model_list[i].gpu_variant == gpu_variant)
94 return &panfrost_model_list[i];
95 }
96
97 return NULL;
98 }
99
100 unsigned
panfrost_query_l2_slices(const struct pan_kmod_dev_props * props)101 panfrost_query_l2_slices(const struct pan_kmod_dev_props *props)
102 {
103 /* L2_SLICES is MEM_FEATURES[11:8] minus(1) */
104 return ((props->mem_features >> 8) & 0xF) + 1;
105 }
106
107 struct panfrost_tiler_features
panfrost_query_tiler_features(const struct pan_kmod_dev_props * props)108 panfrost_query_tiler_features(const struct pan_kmod_dev_props *props)
109 {
110 /* Default value (2^9 bytes and 8 levels) to match old behaviour */
111 uint32_t raw = props->tiler_features;
112
113 /* Bin size is log2 in the first byte, max levels in the second byte */
114 return (struct panfrost_tiler_features){
115 .bin_size = (1 << (raw & BITFIELD_MASK(5))),
116 .max_levels = (raw >> 8) & BITFIELD_MASK(4),
117 };
118 }
119
120 unsigned
panfrost_query_core_count(const struct pan_kmod_dev_props * props,unsigned * core_id_range)121 panfrost_query_core_count(const struct pan_kmod_dev_props *props,
122 unsigned *core_id_range)
123 {
124 /* On older kernels, worst-case to 16 cores */
125
126 unsigned mask = props->shader_present;
127
128 /* Some cores might be absent. In some cases, we care
129 * about the range of core IDs (that is, the greatest core ID + 1). If
130 * the core mask is contiguous, this equals the core count.
131 */
132 *core_id_range = util_last_bit(mask);
133
134 /* The actual core count skips overs the gaps */
135 return util_bitcount(mask);
136 }
137
138 unsigned
panfrost_query_thread_tls_alloc(const struct pan_kmod_dev_props * props)139 panfrost_query_thread_tls_alloc(const struct pan_kmod_dev_props *props)
140 {
141 return props->max_tls_instance_per_core ?: props->max_threads_per_core;
142 }
143
144 unsigned
panfrost_compute_max_thread_count(const struct pan_kmod_dev_props * props,unsigned work_reg_count)145 panfrost_compute_max_thread_count(const struct pan_kmod_dev_props *props,
146 unsigned work_reg_count)
147 {
148 unsigned aligned_reg_count;
149
150 /* 4, 8 or 16 registers per shader on Midgard
151 * 32 or 64 registers per shader on Bifrost
152 */
153 if (pan_arch(props->gpu_prod_id) <= 5) {
154 aligned_reg_count = util_next_power_of_two(MAX2(work_reg_count, 4));
155 assert(aligned_reg_count <= 16);
156 } else {
157 aligned_reg_count = work_reg_count <= 32 ? 32 : 64;
158 }
159
160 return MIN3(props->max_threads_per_wg, props->max_threads_per_core,
161 props->num_registers_per_core / aligned_reg_count);
162 }
163
164 uint32_t
panfrost_query_compressed_formats(const struct pan_kmod_dev_props * props)165 panfrost_query_compressed_formats(const struct pan_kmod_dev_props *props)
166 {
167 return props->texture_features[0];
168 }
169
170 /* Check for AFBC hardware support. AFBC is introduced in v5. Implementations
171 * may omit it, signaled as a nonzero value in the AFBC_FEATURES property. */
172
173 bool
panfrost_query_afbc(const struct pan_kmod_dev_props * props)174 panfrost_query_afbc(const struct pan_kmod_dev_props *props)
175 {
176 unsigned reg = props->afbc_features;
177
178 return (pan_arch(props->gpu_prod_id) >= 5) && (reg == 0);
179 }
180
181 /* Check for AFRC hardware support. AFRC is introduced in v10. Implementations
182 * may omit it, signaled in bit 25 of TEXTURE_FEATURES_0 property. */
183
184 bool
panfrost_query_afrc(const struct pan_kmod_dev_props * props)185 panfrost_query_afrc(const struct pan_kmod_dev_props *props)
186 {
187 return (pan_arch(props->gpu_prod_id) >= 10) &&
188 (props->texture_features[0] & (1 << 25));
189 }
190
191 /*
192 * To pipeline multiple tiles, a given tile may use at most half of the tile
193 * buffer. This function returns the optimal size (assuming pipelining).
194 *
195 * For Mali-G510 and Mali-G310, we will need extra logic to query the tilebuffer
196 * size for the particular variant. The CORE_FEATURES register might help.
197 */
198 unsigned
panfrost_query_optimal_tib_size(const struct panfrost_model * model)199 panfrost_query_optimal_tib_size(const struct panfrost_model *model)
200 {
201 /* Preconditions ensure the returned value is a multiple of 1 KiB, the
202 * granularity of the colour buffer allocation field.
203 */
204 assert(model->tilebuffer_size >= 2048);
205 assert(util_is_power_of_two_nonzero(model->tilebuffer_size));
206
207 return model->tilebuffer_size / 2;
208 }
209
210 uint64_t
panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev * dev,uint64_t va)211 panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev, uint64_t va)
212 {
213 struct pan_kmod_va_range user_va_range =
214 pan_kmod_dev_query_user_va_range(dev);
215
216 if (va < user_va_range.start)
217 return user_va_range.start;
218 else if (va > user_va_range.start + user_va_range.size)
219 return user_va_range.start + user_va_range.size;
220
221 return va;
222 }
223