xref: /aosp_15_r20/external/minigbm/i915.c (revision d95af8df99a05bcb8679a54dc3ab8e5cd312b38e)
1 /*
2  * Copyright 2014 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifdef DRV_I915
8 
9 #include <assert.h>
10 #include <errno.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <sys/mman.h>
15 #include <unistd.h>
16 #include <xf86drm.h>
17 
18 #include "drv_helpers.h"
19 #include "drv_priv.h"
20 #include "external/i915_drm.h"
21 #include "util.h"
22 
23 #define I915_CACHELINE_SIZE 64
24 #define I915_CACHELINE_MASK (I915_CACHELINE_SIZE - 1)
25 
26 static const uint32_t scanout_render_formats[] = { DRM_FORMAT_ABGR2101010, DRM_FORMAT_ABGR8888,
27 						   DRM_FORMAT_ARGB2101010, DRM_FORMAT_ARGB8888,
28 						   DRM_FORMAT_RGB565,	   DRM_FORMAT_XBGR2101010,
29 						   DRM_FORMAT_XBGR8888,	   DRM_FORMAT_XRGB2101010,
30 						   DRM_FORMAT_XRGB8888 };
31 
32 static const uint32_t render_formats[] = { DRM_FORMAT_ABGR16161616F };
33 
34 static const uint32_t texture_only_formats[] = { DRM_FORMAT_R8, DRM_FORMAT_NV12, DRM_FORMAT_P010,
35 						 DRM_FORMAT_YVU420, DRM_FORMAT_YVU420_ANDROID };
36 
37 static const uint64_t gen_modifier_order[] = { I915_FORMAT_MOD_Y_TILED_CCS, I915_FORMAT_MOD_Y_TILED,
38 					       I915_FORMAT_MOD_X_TILED, DRM_FORMAT_MOD_LINEAR };
39 
40 static const uint64_t gen12_modifier_order[] = { I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS,
41 						 I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED,
42 						 DRM_FORMAT_MOD_LINEAR };
43 
44 static const uint64_t gen11_modifier_order[] = { I915_FORMAT_MOD_Y_TILED, I915_FORMAT_MOD_X_TILED,
45 						 DRM_FORMAT_MOD_LINEAR };
46 
47 static const uint64_t xe_lpdp_modifier_order[] = { I915_FORMAT_MOD_4_TILED_MTL_RC_CCS,
48 						   I915_FORMAT_MOD_4_TILED, I915_FORMAT_MOD_X_TILED,
49 						   DRM_FORMAT_MOD_LINEAR };
50 
51 struct modifier_support_t {
52 	const uint64_t *order;
53 	uint32_t count;
54 };
55 
56 struct i915_device {
57 	uint32_t graphics_version;
58 	int32_t has_llc;
59 	int32_t has_hw_protection;
60 	struct modifier_support_t modifier;
61 	int device_id;
62 	bool is_xelpd;
63 	/*TODO : cleanup is_mtl to avoid adding variables for every new platforms */
64 	bool is_mtl;
65 	int32_t num_fences_avail;
66 	bool has_mmap_offset;
67 };
68 
i915_info_from_device_id(struct i915_device * i915)69 static void i915_info_from_device_id(struct i915_device *i915)
70 {
71 	const uint16_t gen3_ids[] = { 0x2582, 0x2592, 0x2772, 0x27A2, 0x27AE,
72 				      0x29C2, 0x29B2, 0x29D2, 0xA001, 0xA011 };
73 	const uint16_t gen4_ids[] = { 0x29A2, 0x2992, 0x2982, 0x2972, 0x2A02, 0x2A12, 0x2A42,
74 				      0x2E02, 0x2E12, 0x2E22, 0x2E32, 0x2E42, 0x2E92 };
75 	const uint16_t gen5_ids[] = { 0x0042, 0x0046 };
76 	const uint16_t gen6_ids[] = { 0x0102, 0x0112, 0x0122, 0x0106, 0x0116, 0x0126, 0x010A };
77 	const uint16_t gen7_ids[] = {
78 		0x0152, 0x0162, 0x0156, 0x0166, 0x015a, 0x016a, 0x0402, 0x0412, 0x0422,
79 		0x0406, 0x0416, 0x0426, 0x040A, 0x041A, 0x042A, 0x040B, 0x041B, 0x042B,
80 		0x040E, 0x041E, 0x042E, 0x0C02, 0x0C12, 0x0C22, 0x0C06, 0x0C16, 0x0C26,
81 		0x0C0A, 0x0C1A, 0x0C2A, 0x0C0B, 0x0C1B, 0x0C2B, 0x0C0E, 0x0C1E, 0x0C2E,
82 		0x0A02, 0x0A12, 0x0A22, 0x0A06, 0x0A16, 0x0A26, 0x0A0A, 0x0A1A, 0x0A2A,
83 		0x0A0B, 0x0A1B, 0x0A2B, 0x0A0E, 0x0A1E, 0x0A2E, 0x0D02, 0x0D12, 0x0D22,
84 		0x0D06, 0x0D16, 0x0D26, 0x0D0A, 0x0D1A, 0x0D2A, 0x0D0B, 0x0D1B, 0x0D2B,
85 		0x0D0E, 0x0D1E, 0x0D2E, 0x0F31, 0x0F32, 0x0F33, 0x0157, 0x0155
86 	};
87 	const uint16_t gen8_ids[] = { 0x22B0, 0x22B1, 0x22B2, 0x22B3, 0x1602, 0x1606,
88 				      0x160A, 0x160B, 0x160D, 0x160E, 0x1612, 0x1616,
89 				      0x161A, 0x161B, 0x161D, 0x161E, 0x1622, 0x1626,
90 				      0x162A, 0x162B, 0x162D, 0x162E };
91 	const uint16_t gen9_ids[] = {
92 		0x1902, 0x1906, 0x190A, 0x190B, 0x190E, 0x1912, 0x1913, 0x1915, 0x1916, 0x1917,
93 		0x191A, 0x191B, 0x191D, 0x191E, 0x1921, 0x1923, 0x1926, 0x1927, 0x192A, 0x192B,
94 		0x192D, 0x1932, 0x193A, 0x193B, 0x193D, 0x0A84, 0x1A84, 0x1A85, 0x5A84, 0x5A85,
95 		0x3184, 0x3185, 0x5902, 0x5906, 0x590A, 0x5908, 0x590B, 0x590E, 0x5913, 0x5915,
96 		0x5917, 0x5912, 0x5916, 0x591A, 0x591B, 0x591D, 0x591E, 0x5921, 0x5923, 0x5926,
97 		0x5927, 0x593B, 0x591C, 0x87C0, 0x87CA, 0x3E90, 0x3E93, 0x3E99, 0x3E9C, 0x3E91,
98 		0x3E92, 0x3E96, 0x3E98, 0x3E9A, 0x3E9B, 0x3E94, 0x3EA9, 0x3EA5, 0x3EA6, 0x3EA7,
99 		0x3EA8, 0x3EA1, 0x3EA4, 0x3EA0, 0x3EA3, 0x3EA2, 0x9B21, 0x9BA0, 0x9BA2, 0x9BA4,
100 		0x9BA5, 0x9BA8, 0x9BAA, 0x9BAB, 0x9BAC, 0x9B41, 0x9BC0, 0x9BC2, 0x9BC4, 0x9BC5,
101 		0x9BC6, 0x9BC8, 0x9BCA, 0x9BCB, 0x9BCC, 0x9BE6, 0x9BF6
102 	};
103 	const uint16_t gen11_ids[] = { 0x8A50, 0x8A51, 0x8A52, 0x8A53, 0x8A54, 0x8A56, 0x8A57,
104 				       0x8A58, 0x8A59, 0x8A5A, 0x8A5B, 0x8A5C, 0x8A5D, 0x8A71,
105 				       0x4500, 0x4541, 0x4551, 0x4555, 0x4557, 0x4571, 0x4E51,
106 				       0x4E55, 0x4E57, 0x4E61, 0x4E71 };
107 	const uint16_t gen12_ids[] = {
108 		0x4c8a, 0x4c8b, 0x4c8c, 0x4c90, 0x4c9a, 0x4680, 0x4681, 0x4682, 0x4683, 0x4688,
109 		0x4689, 0x4690, 0x4691, 0x4692, 0x4693, 0x4698, 0x4699, 0x4626, 0x4628, 0x462a,
110 		0x46a0, 0x46a1, 0x46a2, 0x46a3, 0x46a6, 0x46a8, 0x46aa, 0x46b0, 0x46b1, 0x46b2,
111 		0x46b3, 0x46c0, 0x46c1, 0x46c2, 0x46c3, 0x9A40, 0x9A49, 0x9A59, 0x9A60, 0x9A68,
112 		0x9A70, 0x9A78, 0x9AC0, 0x9AC9, 0x9AD9, 0x9AF8, 0x4905, 0x4906, 0x4907, 0x4908
113 	};
114 	const uint16_t adlp_ids[] = { 0x46A0, 0x46A1, 0x46A2, 0x46A3, 0x46A6, 0x46A8,
115 				      0x46AA, 0x462A, 0x4626, 0x4628, 0x46B0, 0x46B1,
116 				      0x46B2, 0x46B3, 0x46C0, 0x46C1, 0x46C2, 0x46C3,
117 				      0x46D0, 0x46D1, 0x46D2, 0x46D3, 0x46D4 };
118 
119 	const uint16_t rplp_ids[] = { 0xA720, 0xA721, 0xA7A0, 0xA7A1, 0xA7A8,
120 				      0xA7A9, 0xA7AA, 0xA7AB, 0xA7AC, 0xA7AD };
121 
122 	const uint16_t mtl_ids[] = { 0x7D40, 0x7D60, 0x7D45, 0x7D55, 0x7DD5 };
123 
124 	unsigned i;
125 	i915->graphics_version = 4;
126 	i915->is_xelpd = false;
127 	i915->is_mtl = false;
128 
129 	for (i = 0; i < ARRAY_SIZE(gen3_ids); i++)
130 		if (gen3_ids[i] == i915->device_id)
131 			i915->graphics_version = 3;
132 
133 	/* Gen 4 */
134 	for (i = 0; i < ARRAY_SIZE(gen4_ids); i++)
135 		if (gen4_ids[i] == i915->device_id)
136 			i915->graphics_version = 4;
137 
138 	/* Gen 5 */
139 	for (i = 0; i < ARRAY_SIZE(gen5_ids); i++)
140 		if (gen5_ids[i] == i915->device_id)
141 			i915->graphics_version = 5;
142 
143 	/* Gen 6 */
144 	for (i = 0; i < ARRAY_SIZE(gen6_ids); i++)
145 		if (gen6_ids[i] == i915->device_id)
146 			i915->graphics_version = 6;
147 
148 	/* Gen 7 */
149 	for (i = 0; i < ARRAY_SIZE(gen7_ids); i++)
150 		if (gen7_ids[i] == i915->device_id)
151 			i915->graphics_version = 7;
152 
153 	/* Gen 8 */
154 	for (i = 0; i < ARRAY_SIZE(gen8_ids); i++)
155 		if (gen8_ids[i] == i915->device_id)
156 			i915->graphics_version = 8;
157 
158 	/* Gen 9 */
159 	for (i = 0; i < ARRAY_SIZE(gen9_ids); i++)
160 		if (gen9_ids[i] == i915->device_id)
161 			i915->graphics_version = 9;
162 
163 	/* Gen 11 */
164 	for (i = 0; i < ARRAY_SIZE(gen11_ids); i++)
165 		if (gen11_ids[i] == i915->device_id)
166 			i915->graphics_version = 11;
167 
168 	/* Gen 12 */
169 	for (i = 0; i < ARRAY_SIZE(gen12_ids); i++)
170 		if (gen12_ids[i] == i915->device_id)
171 			i915->graphics_version = 12;
172 
173 	for (i = 0; i < ARRAY_SIZE(adlp_ids); i++)
174 		if (adlp_ids[i] == i915->device_id) {
175 			i915->is_xelpd = true;
176 			i915->graphics_version = 12;
177 		}
178 
179 	for (i = 0; i < ARRAY_SIZE(rplp_ids); i++)
180 		if (rplp_ids[i] == i915->device_id) {
181 			i915->is_xelpd = true;
182 			i915->graphics_version = 12;
183 		}
184 
185 	for (i = 0; i < ARRAY_SIZE(mtl_ids); i++)
186 		if (mtl_ids[i] == i915->device_id) {
187 			i915->graphics_version = 12;
188 			i915->is_mtl = true;
189 		}
190 }
191 
i915_get_modifier_order(struct i915_device * i915)192 static void i915_get_modifier_order(struct i915_device *i915)
193 {
194 	if (i915->is_mtl) {
195 		i915->modifier.order = xe_lpdp_modifier_order;
196 		i915->modifier.count = ARRAY_SIZE(xe_lpdp_modifier_order);
197 	} else if (i915->graphics_version == 12) {
198 		i915->modifier.order = gen12_modifier_order;
199 		i915->modifier.count = ARRAY_SIZE(gen12_modifier_order);
200 	} else if (i915->graphics_version == 11) {
201 		i915->modifier.order = gen11_modifier_order;
202 		i915->modifier.count = ARRAY_SIZE(gen11_modifier_order);
203 	} else {
204 		i915->modifier.order = gen_modifier_order;
205 		i915->modifier.count = ARRAY_SIZE(gen_modifier_order);
206 	}
207 }
208 
unset_flags(uint64_t current_flags,uint64_t mask)209 static uint64_t unset_flags(uint64_t current_flags, uint64_t mask)
210 {
211 	uint64_t value = current_flags & ~mask;
212 	return value;
213 }
214 
i915_add_combinations(struct driver * drv)215 static int i915_add_combinations(struct driver *drv)
216 {
217 	struct i915_device *i915 = drv->priv;
218 
219 	const uint64_t scanout_and_render = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
220 	const uint64_t render = BO_USE_RENDER_MASK;
221 	const uint64_t texture_only = BO_USE_TEXTURE_MASK;
222 	// HW protected buffers also need to be scanned out.
223 	const uint64_t hw_protected =
224 	    i915->has_hw_protection ? (BO_USE_PROTECTED | BO_USE_SCANOUT) : 0;
225 
226 	const uint64_t linear_mask = BO_USE_RENDERSCRIPT | BO_USE_LINEAR | BO_USE_SW_READ_OFTEN |
227 				     BO_USE_SW_WRITE_OFTEN | BO_USE_SW_READ_RARELY |
228 				     BO_USE_SW_WRITE_RARELY;
229 
230 	struct format_metadata metadata_linear = { .tiling = I915_TILING_NONE,
231 						   .priority = 1,
232 						   .modifier = DRM_FORMAT_MOD_LINEAR };
233 
234 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
235 			     &metadata_linear, scanout_and_render);
236 
237 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_linear,
238 			     render);
239 
240 	drv_add_combinations(drv, texture_only_formats, ARRAY_SIZE(texture_only_formats),
241 			     &metadata_linear, texture_only);
242 
243 	drv_modify_linear_combinations(drv);
244 
245 	/* NV12 format for camera, display, decoding and encoding. */
246 	/* IPU3 camera ISP supports only NV12 output. */
247 	drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata_linear,
248 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
249 				   BO_USE_HW_VIDEO_DECODER | BO_USE_HW_VIDEO_ENCODER |
250 				   hw_protected);
251 
252 	/* P010 linear can be used for scanout too. */
253 	drv_modify_combination(drv, DRM_FORMAT_P010, &metadata_linear, BO_USE_SCANOUT);
254 
255 	/*
256 	 * Android also frequently requests YV12 formats for some camera implementations
257 	 * (including the external provider implmenetation).
258 	 */
259 	drv_modify_combination(drv, DRM_FORMAT_YVU420_ANDROID, &metadata_linear,
260 			       BO_USE_CAMERA_WRITE);
261 
262 	/* Android CTS tests require this. */
263 	drv_add_combination(drv, DRM_FORMAT_BGR888, &metadata_linear, BO_USE_SW_MASK);
264 
265 	/*
266 	 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
267 	 * from camera and input/output from hardware decoder/encoder.
268 	 */
269 	drv_modify_combination(drv, DRM_FORMAT_R8, &metadata_linear,
270 			       BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
271 				   BO_USE_HW_VIDEO_ENCODER | BO_USE_GPU_DATA_BUFFER |
272 				   BO_USE_SENSOR_DIRECT_DATA);
273 
274 	const uint64_t render_not_linear = unset_flags(render, linear_mask);
275 	const uint64_t scanout_and_render_not_linear = render_not_linear | BO_USE_SCANOUT;
276 
277 	struct format_metadata metadata_x_tiled = { .tiling = I915_TILING_X,
278 						    .priority = 2,
279 						    .modifier = I915_FORMAT_MOD_X_TILED };
280 
281 	drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats), &metadata_x_tiled,
282 			     render_not_linear);
283 	drv_add_combinations(drv, scanout_render_formats, ARRAY_SIZE(scanout_render_formats),
284 			     &metadata_x_tiled, scanout_and_render_not_linear);
285 
286 	if (i915->is_mtl) {
287 		struct format_metadata metadata_4_tiled = { .tiling = I915_TILING_4,
288 							    .priority = 3,
289 							    .modifier = I915_FORMAT_MOD_4_TILED };
290 /* Support tile4 NV12 and P010 for libva */
291 #ifdef I915_SCANOUT_4_TILED
292 		const uint64_t nv12_usage =
293 		    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
294 		const uint64_t p010_usage =
295 		    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | hw_protected | BO_USE_SCANOUT;
296 #else
297 		const uint64_t nv12_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER;
298 		const uint64_t p010_usage = nv12_usage;
299 #endif
300 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_4_tiled, nv12_usage);
301 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_4_tiled, p010_usage);
302 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
303 				     &metadata_4_tiled, render_not_linear);
304 		drv_add_combinations(drv, scanout_render_formats,
305 				     ARRAY_SIZE(scanout_render_formats), &metadata_4_tiled,
306 				     scanout_and_render_not_linear);
307 	} else {
308 		struct format_metadata metadata_y_tiled = { .tiling = I915_TILING_Y,
309 							    .priority = 3,
310 							    .modifier = I915_FORMAT_MOD_Y_TILED };
311 
312 /* Support y-tiled NV12 and P010 for libva */
313 #ifdef I915_SCANOUT_Y_TILED
314 		const uint64_t nv12_usage =
315 		    BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER | BO_USE_SCANOUT | hw_protected;
316 		const uint64_t p010_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER |
317 					    hw_protected |
318 					    (i915->graphics_version >= 11 ? BO_USE_SCANOUT : 0);
319 #else
320 		const uint64_t nv12_usage = BO_USE_TEXTURE | BO_USE_HW_VIDEO_DECODER;
321 		const uint64_t p010_usage = nv12_usage;
322 #endif
323 		drv_add_combinations(drv, render_formats, ARRAY_SIZE(render_formats),
324 				     &metadata_y_tiled, render_not_linear);
325 		/* Y-tiled scanout isn't available on old platforms so we add
326 		 * |scanout_render_formats| without that USE flag.
327 		 */
328 		drv_add_combinations(drv, scanout_render_formats,
329 				     ARRAY_SIZE(scanout_render_formats), &metadata_y_tiled,
330 				     render_not_linear);
331 		drv_add_combination(drv, DRM_FORMAT_NV12, &metadata_y_tiled, nv12_usage);
332 		drv_add_combination(drv, DRM_FORMAT_P010, &metadata_y_tiled, p010_usage);
333 	}
334 	return 0;
335 }
336 
i915_align_dimensions(struct bo * bo,uint32_t format,uint32_t tiling,uint32_t * stride,uint32_t * aligned_height)337 static int i915_align_dimensions(struct bo *bo, uint32_t format, uint32_t tiling, uint32_t *stride,
338 				 uint32_t *aligned_height)
339 {
340 	struct i915_device *i915 = bo->drv->priv;
341 	uint32_t horizontal_alignment;
342 	uint32_t vertical_alignment;
343 
344 	switch (tiling) {
345 	default:
346 	case I915_TILING_NONE:
347 		/*
348 		 * The Intel GPU doesn't need any alignment in linear mode,
349 		 * but libva requires the allocation stride to be aligned to
350 		 * 16 bytes and height to 4 rows. Further, we round up the
351 		 * horizontal alignment so that row start on a cache line (64
352 		 * bytes).
353 		 */
354 #ifdef LINEAR_ALIGN_256
355 		/*
356 		 * If we want to import these buffers to amdgpu they need to
357 		 * their match LINEAR_ALIGNED requirement of 256 byte alignement.
358 		 */
359 		horizontal_alignment = 256;
360 #else
361 		horizontal_alignment = 64;
362 #endif
363 
364 		/*
365 		 * For hardware video encoding buffers, we want to align to the size of a
366 		 * macroblock, because otherwise we will end up encoding uninitialized data.
367 		 * This can result in substantial quality degradations, especially on lower
368 		 * resolution videos, because this uninitialized data may be high entropy.
369 		 * For R8 and height=1, we assume the surface will be used as a linear buffer blob
370 		 * (such as VkBuffer). The hardware allows vertical_alignment=1 only for non-tiled
371 		 * 1D surfaces, which covers the VkBuffer case. However, if the app uses the surface
372 		 * as a 2D image with height=1, then this code is buggy. For 2D images, the hardware
373 		 * requires a vertical_alignment >= 4, and underallocating with vertical_alignment=1
374 		 * will cause the GPU to read out-of-bounds.
375 		 *
376 		 * TODO: add a new DRM_FORMAT_BLOB format for this case, or further tighten up the
377 		 * constraints with GPU_DATA_BUFFER usage when the guest has migrated to use
378 		 * virtgpu_cross_domain backend which passes that flag through.
379 		 */
380 		if (bo->meta.use_flags & BO_USE_HW_VIDEO_ENCODER) {
381 			vertical_alignment = 8;
382 		} else if (format == DRM_FORMAT_R8 && *aligned_height == 1) {
383 			vertical_alignment = 1;
384 		} else {
385 			vertical_alignment = 4;
386 		}
387 
388 		break;
389 
390 	case I915_TILING_X:
391 		horizontal_alignment = 512;
392 		vertical_alignment = 8;
393 		break;
394 
395 	case I915_TILING_Y:
396 	case I915_TILING_4:
397 		if (i915->graphics_version == 3) {
398 			horizontal_alignment = 512;
399 			vertical_alignment = 8;
400 		} else {
401 			horizontal_alignment = 128;
402 			vertical_alignment = 32;
403 		}
404 		break;
405 	}
406 
407 	*aligned_height = ALIGN(*aligned_height, vertical_alignment);
408 	if (i915->graphics_version > 3) {
409 		*stride = ALIGN(*stride, horizontal_alignment);
410 	} else {
411 		while (*stride > horizontal_alignment)
412 			horizontal_alignment <<= 1;
413 
414 		*stride = horizontal_alignment;
415 	}
416 
417 	if (i915->graphics_version <= 3 && *stride > 8192)
418 		return -EINVAL;
419 
420 	return 0;
421 }
422 
i915_clflush(void * start,size_t size)423 static void i915_clflush(void *start, size_t size)
424 {
425 	void *p = (void *)(((uintptr_t)start) & ~I915_CACHELINE_MASK);
426 	void *end = (void *)((uintptr_t)start + size);
427 
428 	__builtin_ia32_mfence();
429 	while (p < end) {
430 #if defined(__CLFLUSHOPT__)
431 		__builtin_ia32_clflushopt(p);
432 #else
433 		__builtin_ia32_clflush(p);
434 #endif
435 		p = (void *)((uintptr_t)p + I915_CACHELINE_SIZE);
436 	}
437 	__builtin_ia32_mfence();
438 }
439 
i915_init(struct driver * drv)440 static int i915_init(struct driver *drv)
441 {
442 	int ret, val;
443 	struct i915_device *i915;
444 	drm_i915_getparam_t get_param = { 0 };
445 
446 	i915 = calloc(1, sizeof(*i915));
447 	if (!i915)
448 		return -ENOMEM;
449 
450 	get_param.param = I915_PARAM_CHIPSET_ID;
451 	get_param.value = &(i915->device_id);
452 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
453 	if (ret) {
454 		drv_loge("Failed to get I915_PARAM_CHIPSET_ID\n");
455 		free(i915);
456 		return -EINVAL;
457 	}
458 	/* must call before i915->graphics_version is used anywhere else */
459 	i915_info_from_device_id(i915);
460 
461 	i915_get_modifier_order(i915);
462 
463 	memset(&get_param, 0, sizeof(get_param));
464 	get_param.param = I915_PARAM_HAS_LLC;
465 	get_param.value = &i915->has_llc;
466 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
467 	if (ret) {
468 		drv_loge("Failed to get I915_PARAM_HAS_LLC\n");
469 		free(i915);
470 		return -EINVAL;
471 	}
472 
473 	memset(&get_param, 0, sizeof(get_param));
474 	get_param.param = I915_PARAM_NUM_FENCES_AVAIL;
475 	get_param.value = &i915->num_fences_avail;
476 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
477 	if (ret) {
478 		drv_loge("Failed to get I915_PARAM_NUM_FENCES_AVAIL\n");
479 		free(i915);
480 		return -EINVAL;
481 	}
482 
483 	memset(&get_param, 0, sizeof(get_param));
484 	get_param.param = I915_PARAM_MMAP_GTT_VERSION;
485 	get_param.value = &val;
486 
487 	ret = drmIoctl(drv->fd, DRM_IOCTL_I915_GETPARAM, &get_param);
488 	if (ret) {
489 		drv_loge("Failed to get I915_PARAM_MMAP_GTT_VERSION\n");
490 		free(i915);
491 		return -EINVAL;
492 	}
493 	i915->has_mmap_offset = (val >= 4);
494 
495 	if (i915->graphics_version >= 12)
496 		i915->has_hw_protection = 1;
497 
498 	drv->priv = i915;
499 	return i915_add_combinations(drv);
500 }
501 
502 /*
503  * Returns true if the height of a buffer of the given format should be aligned
504  * to the largest coded unit (LCU) assuming that it will be used for video. This
505  * is based on gmmlib's GmmIsYUVFormatLCUAligned().
506  */
i915_format_needs_LCU_alignment(uint32_t format,size_t plane,const struct i915_device * i915)507 static bool i915_format_needs_LCU_alignment(uint32_t format, size_t plane,
508 					    const struct i915_device *i915)
509 {
510 	switch (format) {
511 	case DRM_FORMAT_NV12:
512 	case DRM_FORMAT_P010:
513 	case DRM_FORMAT_P016:
514 		return (i915->graphics_version == 11 || i915->graphics_version == 12) && plane == 1;
515 	}
516 	return false;
517 }
518 
i915_bo_from_format(struct bo * bo,uint32_t width,uint32_t height,uint32_t format)519 static int i915_bo_from_format(struct bo *bo, uint32_t width, uint32_t height, uint32_t format)
520 {
521 	uint32_t offset;
522 	size_t plane;
523 	int ret, pagesize;
524 	struct i915_device *i915 = bo->drv->priv;
525 
526 	offset = 0;
527 	pagesize = getpagesize();
528 
529 	for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
530 		uint32_t stride = drv_stride_from_format(format, width, plane);
531 		uint32_t plane_height = drv_height_from_format(format, height, plane);
532 
533 		if (bo->meta.tiling != I915_TILING_NONE)
534 			assert(IS_ALIGNED(offset, pagesize));
535 
536 		ret = i915_align_dimensions(bo, format, bo->meta.tiling, &stride, &plane_height);
537 		if (ret)
538 			return ret;
539 
540 		if (i915_format_needs_LCU_alignment(format, plane, i915)) {
541 			/*
542 			 * Align the height of the V plane for certain formats to the
543 			 * largest coded unit (assuming that this BO may be used for video)
544 			 * to be consistent with gmmlib.
545 			 */
546 			plane_height = ALIGN(plane_height, 64);
547 		}
548 
549 		bo->meta.strides[plane] = stride;
550 		bo->meta.sizes[plane] = stride * plane_height;
551 		bo->meta.offsets[plane] = offset;
552 		offset += bo->meta.sizes[plane];
553 	}
554 
555 	bo->meta.total_size = ALIGN(offset, pagesize);
556 
557 	return 0;
558 }
559 
i915_num_planes_from_modifier(struct driver * drv,uint32_t format,uint64_t modifier)560 static size_t i915_num_planes_from_modifier(struct driver *drv, uint32_t format, uint64_t modifier)
561 {
562 	size_t num_planes = drv_num_planes_from_format(format);
563 	if (modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
564 	    modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS ||
565 	    modifier == I915_FORMAT_MOD_4_TILED_MTL_RC_CCS) {
566 		assert(num_planes == 1);
567 		return 2;
568 	}
569 	return num_planes;
570 }
571 
572 #define gbm_fls(x)                                                                                 \
573 	((x) ? __builtin_choose_expr(sizeof(x) == 8, 64 - __builtin_clzll(x),                      \
574 				     32 - __builtin_clz(x))                                        \
575 	     : 0)
576 
577 #define roundup_power_of_two(x) ((x) != 0 ? 1ULL << gbm_fls((x) - 1) : 0)
578 
i915_bo_compute_metadata(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t use_flags,const uint64_t * modifiers,uint32_t count)579 static int i915_bo_compute_metadata(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
580 				    uint64_t use_flags, const uint64_t *modifiers, uint32_t count)
581 {
582 	uint64_t modifier;
583 	struct i915_device *i915 = bo->drv->priv;
584 	bool huge_bo = (i915->graphics_version < 11) && (width > 4096);
585 
586 	if (modifiers) {
587 		modifier =
588 		    drv_pick_modifier(modifiers, count, i915->modifier.order, i915->modifier.count);
589 	} else {
590 		struct combination *combo = drv_get_combination(bo->drv, format, use_flags);
591 		if (!combo)
592 			return -EINVAL;
593 		modifier = combo->metadata.modifier;
594 	}
595 
596 	/*
597 	 * i915 only supports linear/x-tiled above 4096 wide on Gen9/Gen10 GPU.
598 	 * VAAPI decode in NV12 Y tiled format so skip modifier change for NV12/P010 huge bo.
599 	 */
600 	if (huge_bo && format != DRM_FORMAT_NV12 && format != DRM_FORMAT_P010 &&
601 	    modifier != I915_FORMAT_MOD_X_TILED && modifier != DRM_FORMAT_MOD_LINEAR) {
602 		uint32_t i;
603 		for (i = 0; modifiers && i < count; i++) {
604 			if (modifiers[i] == I915_FORMAT_MOD_X_TILED)
605 				break;
606 		}
607 		if (i == count)
608 			modifier = DRM_FORMAT_MOD_LINEAR;
609 		else
610 			modifier = I915_FORMAT_MOD_X_TILED;
611 	}
612 
613 	/*
614 	 * Skip I915_FORMAT_MOD_Y_TILED_CCS modifier if compression is disabled
615 	 * Pick y tiled modifier if it has been passed in, otherwise use linear
616 	 */
617 	if (!bo->drv->compression && modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
618 		uint32_t i;
619 		for (i = 0; modifiers && i < count; i++) {
620 			if (modifiers[i] == I915_FORMAT_MOD_Y_TILED)
621 				break;
622 		}
623 		if (i == count)
624 			modifier = DRM_FORMAT_MOD_LINEAR;
625 		else
626 			modifier = I915_FORMAT_MOD_Y_TILED;
627 	}
628 
629 	/* Prevent gen 8 and earlier from trying to use a tiling modifier */
630 	if (i915->graphics_version <= 8 && format == DRM_FORMAT_ARGB8888) {
631 		modifier = DRM_FORMAT_MOD_LINEAR;
632 	}
633 
634 	switch (modifier) {
635 	case DRM_FORMAT_MOD_LINEAR:
636 		bo->meta.tiling = I915_TILING_NONE;
637 		break;
638 	case I915_FORMAT_MOD_X_TILED:
639 		bo->meta.tiling = I915_TILING_X;
640 		break;
641 	case I915_FORMAT_MOD_Y_TILED:
642 	case I915_FORMAT_MOD_Y_TILED_CCS:
643 	/* For now support only I915_TILING_Y as this works with all
644 	 * IPs(render/media/display)
645 	 */
646 	case I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS:
647 		bo->meta.tiling = I915_TILING_Y;
648 		break;
649 	case I915_FORMAT_MOD_4_TILED:
650 	case I915_FORMAT_MOD_4_TILED_MTL_RC_CCS:
651 		bo->meta.tiling = I915_TILING_4;
652 		break;
653 	}
654 
655 	bo->meta.format_modifier = modifier;
656 
657 	if (format == DRM_FORMAT_YVU420_ANDROID) {
658 		/*
659 		 * We only need to be able to use this as a linear texture,
660 		 * which doesn't put any HW restrictions on how we lay it
661 		 * out. The Android format does require the stride to be a
662 		 * multiple of 16 and expects the Cr and Cb stride to be
663 		 * ALIGN(Y_stride / 2, 16), which we can make happen by
664 		 * aligning to 32 bytes here.
665 		 */
666 		uint32_t stride = ALIGN(width, 32);
667 		return drv_bo_from_format(bo, stride, 1, height, format);
668 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_CCS) {
669 		/*
670 		 * For compressed surfaces, we need a color control surface
671 		 * (CCS). Color compression is only supported for Y tiled
672 		 * surfaces, and for each 32x16 tiles in the main surface we
673 		 * need a tile in the control surface.  Y tiles are 128 bytes
674 		 * wide and 32 lines tall and we use that to first compute the
675 		 * width and height in tiles of the main surface. stride and
676 		 * height are already multiples of 128 and 32, respectively:
677 		 */
678 		uint32_t stride = drv_stride_from_format(format, width, 0);
679 		uint32_t width_in_tiles = DIV_ROUND_UP(stride, 128);
680 		uint32_t height_in_tiles = DIV_ROUND_UP(height, 32);
681 		uint32_t size = width_in_tiles * height_in_tiles * 4096;
682 		uint32_t offset = 0;
683 
684 		bo->meta.strides[0] = width_in_tiles * 128;
685 		bo->meta.sizes[0] = size;
686 		bo->meta.offsets[0] = offset;
687 		offset += size;
688 
689 		/*
690 		 * Now, compute the width and height in tiles of the control
691 		 * surface by dividing and rounding up.
692 		 */
693 		uint32_t ccs_width_in_tiles = DIV_ROUND_UP(width_in_tiles, 32);
694 		uint32_t ccs_height_in_tiles = DIV_ROUND_UP(height_in_tiles, 16);
695 		uint32_t ccs_size = ccs_width_in_tiles * ccs_height_in_tiles * 4096;
696 
697 		/*
698 		 * With stride and height aligned to y tiles, offset is
699 		 * already a multiple of 4096, which is the required alignment
700 		 * of the CCS.
701 		 */
702 		bo->meta.strides[1] = ccs_width_in_tiles * 128;
703 		bo->meta.sizes[1] = ccs_size;
704 		bo->meta.offsets[1] = offset;
705 		offset += ccs_size;
706 
707 		bo->meta.num_planes = i915_num_planes_from_modifier(bo->drv, format, modifier);
708 		bo->meta.total_size = offset;
709 	} else if (modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) {
710 		assert(drv_num_planes_from_format(format) > 0);
711 		uint32_t offset = 0;
712 		size_t plane = 0;
713 		size_t a_plane = 0;
714 		/*
715 		 * considering only 128 byte compression and one cache line of
716 		 * aux buffer(64B) contains compression status of 4-Y tiles.
717 		 * Which is 4 * (128B * 32L).
718 		 * line stride(bytes) is 4 * 128B
719 		 * and tile stride(lines) is 32L
720 		 */
721 		for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
722 			uint32_t stride = ALIGN(drv_stride_from_format(format, width, plane), 512);
723 
724 			const uint32_t plane_height = drv_height_from_format(format, height, plane);
725 			uint32_t aligned_height = ALIGN(plane_height, 32);
726 
727 			if (i915->is_xelpd && (stride > 1)) {
728 				stride = 1 << (32 - __builtin_clz(stride - 1));
729 				aligned_height = ALIGN(plane_height, 128);
730 			}
731 
732 			bo->meta.strides[plane] = stride;
733 			/* size calculation & alignment are 64KB aligned
734 			 * size as per spec
735 			 */
736 			bo->meta.sizes[plane] = ALIGN(stride * aligned_height, 512 * 128);
737 			bo->meta.offsets[plane] = offset;
738 			/* next buffer offset */
739 			offset += bo->meta.sizes[plane];
740 		}
741 
742 		/* Aux buffer is linear and page aligned. It is placed after
743 		 * other planes and aligned to main buffer stride.
744 		 */
745 		for (a_plane = 0; a_plane < plane; a_plane++) {
746 			/* Every 64 bytes in the aux plane contain compression information for a
747 			 * sub-row of 4 Y tiles of the corresponding main plane, so the pitch in
748 			 * bytes of the aux plane should be the pitch of the main plane in units of
749 			 * 4 tiles multiplied by 64 (or equivalently, the pitch of the main plane in
750 			 * bytes divided by 8).
751 			 */
752 			bo->meta.strides[plane + a_plane] = bo->meta.strides[a_plane] / 8;
753 			/* Aligned to page size */
754 			bo->meta.sizes[plane + a_plane] =
755 			    ALIGN(bo->meta.sizes[a_plane] / 256, 4 * 1024);
756 			bo->meta.offsets[plane + a_plane] = offset;
757 
758 			/* next buffer offset */
759 			offset += bo->meta.sizes[plane + a_plane];
760 		}
761 		/* Total number of planes & sizes */
762 		bo->meta.num_planes = plane + a_plane;
763 		bo->meta.total_size = offset;
764 	} else if (modifier == I915_FORMAT_MOD_4_TILED_MTL_RC_CCS) {
765 		assert(drv_num_planes_from_format(format) > 0);
766 		uint32_t offset = 0, stride = 0;
767 		size_t plane = 0;
768 		size_t a_plane = 0;
769 		for (plane = 0; plane < drv_num_planes_from_format(format); plane++) {
770 			uint32_t alignment = 0, val, tmpoffset = 0;
771 			/*
772 			 * tile_align = 4 (for width) for CCS
773 			 */
774 			stride = ALIGN(drv_stride_from_format(format, width, plane), 512);
775 			height = ALIGN(drv_height_from_format(format, height, plane), 32);
776 			bo->meta.strides[plane] = stride;
777 
778 			/* MTL needs 1MB Alignment */
779 			bo->meta.sizes[plane] = ALIGN(stride * height, 0x100000);
780 			if (plane == 1 &&
781 			    (format == DRM_FORMAT_NV12 || format == DRM_FORMAT_P010)) {
782 				alignment = 1 << 20;
783 				offset += alignment - (offset % alignment);
784 				tmpoffset = offset;
785 				val = roundup_power_of_two(stride);
786 				if ((stride * val) > tmpoffset)
787 					offset = stride * val;
788 			}
789 
790 			bo->meta.offsets[plane] = offset;
791 			offset += bo->meta.sizes[plane];
792 		}
793 
794 		/* Aux buffer is linear and page aligned. It is placed after
795 		 * other planes and aligned to main buffer stride.
796 		 */
797 		for (a_plane = 0; a_plane < plane; a_plane++) {
798 			stride = bo->meta.strides[a_plane] / 8;
799 			bo->meta.strides[a_plane + plane] = stride;
800 
801 			/* Aligned to page size */
802 			bo->meta.sizes[a_plane + plane] =
803 			    ALIGN(bo->meta.sizes[a_plane] / 256, getpagesize());
804 			bo->meta.offsets[a_plane + plane] = offset;
805 			/* next buffer offset */
806 			offset += bo->meta.sizes[plane + a_plane];
807 		}
808 
809 		bo->meta.num_planes = plane + a_plane;
810 		bo->meta.total_size = offset;
811 	} else {
812 		return i915_bo_from_format(bo, width, height, format);
813 	}
814 	return 0;
815 }
816 
i915_bo_create_from_metadata(struct bo * bo)817 static int i915_bo_create_from_metadata(struct bo *bo)
818 {
819 	int ret;
820 	uint32_t gem_handle;
821 	struct drm_i915_gem_set_tiling gem_set_tiling = { 0 };
822 	struct i915_device *i915 = bo->drv->priv;
823 
824 	if (i915->has_hw_protection && (bo->meta.use_flags & BO_USE_PROTECTED)) {
825 		struct drm_i915_gem_create_ext_protected_content protected_content = {
826 			.base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
827 			.flags = 0,
828 		};
829 
830 		struct drm_i915_gem_create_ext create_ext = {
831 			.size = bo->meta.total_size,
832 			.extensions = (uintptr_t)&protected_content,
833 		};
834 
835 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
836 		if (ret) {
837 			drv_loge("DRM_IOCTL_I915_GEM_CREATE_EXT failed (size=%llu) (ret=%d) \n",
838 				 create_ext.size, ret);
839 			return -errno;
840 		}
841 
842 		gem_handle = create_ext.handle;
843 	} else {
844 		struct drm_i915_gem_create gem_create = { 0 };
845 		gem_create.size = bo->meta.total_size;
846 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
847 		if (ret) {
848 			drv_loge("DRM_IOCTL_I915_GEM_CREATE failed (size=%llu)\n", gem_create.size);
849 			return -errno;
850 		}
851 
852 		gem_handle = gem_create.handle;
853 	}
854 
855 	bo->handle.u32 = gem_handle;
856 
857 	/* Set/Get tiling ioctl not supported  based on fence availability
858 	   Refer : "https://patchwork.freedesktop.org/patch/325343/"
859 	 */
860 	if (i915->num_fences_avail) {
861 		gem_set_tiling.handle = bo->handle.u32;
862 		gem_set_tiling.tiling_mode = bo->meta.tiling;
863 		gem_set_tiling.stride = bo->meta.strides[0];
864 
865 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_TILING, &gem_set_tiling);
866 		if (ret) {
867 			struct drm_gem_close gem_close = { 0 };
868 			gem_close.handle = bo->handle.u32;
869 			drmIoctl(bo->drv->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
870 
871 			drv_loge("DRM_IOCTL_I915_GEM_SET_TILING failed with %d\n", errno);
872 			return -errno;
873 		}
874 	}
875 
876 	bo->meta.cached = (i915->has_llc || i915->is_mtl) && !(bo->meta.use_flags & BO_USE_SCANOUT);
877 
878 	return 0;
879 }
880 
i915_close(struct driver * drv)881 static void i915_close(struct driver *drv)
882 {
883 	free(drv->priv);
884 	drv->priv = NULL;
885 }
886 
i915_bo_import(struct bo * bo,struct drv_import_fd_data * data)887 static int i915_bo_import(struct bo *bo, struct drv_import_fd_data *data)
888 {
889 	int ret;
890 	struct drm_i915_gem_get_tiling gem_get_tiling = { 0 };
891 	struct i915_device *i915 = bo->drv->priv;
892 
893 	bo->meta.num_planes =
894 	    i915_num_planes_from_modifier(bo->drv, data->format, data->format_modifier);
895 
896 	ret = drv_prime_bo_import(bo, data);
897 	if (ret)
898 		return ret;
899 
900 	/* Set/Get tiling ioctl not supported  based on fence availability
901 	   Refer : "https://patchwork.freedesktop.org/patch/325343/"
902 	 */
903 	if (i915->num_fences_avail) {
904 		/* TODO(gsingh): export modifiers and get rid of backdoor tiling. */
905 		gem_get_tiling.handle = bo->handle.u32;
906 
907 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_GET_TILING, &gem_get_tiling);
908 		if (ret) {
909 			drv_gem_bo_destroy(bo);
910 			drv_loge("DRM_IOCTL_I915_GEM_GET_TILING failed.\n");
911 			return ret;
912 		}
913 		bo->meta.tiling = gem_get_tiling.tiling_mode;
914 	}
915 	return 0;
916 }
917 
i915_bo_map(struct bo * bo,struct vma * vma,uint32_t map_flags)918 static void *i915_bo_map(struct bo *bo, struct vma *vma, uint32_t map_flags)
919 {
920 	int ret;
921 	void *addr = MAP_FAILED;
922 	struct i915_device *i915 = bo->drv->priv;
923 
924 	if ((bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_CCS) ||
925 	    (bo->meta.format_modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS) ||
926 	    (bo->meta.format_modifier == I915_FORMAT_MOD_4_TILED) ||
927 	    (bo->meta.format_modifier == I915_FORMAT_MOD_4_TILED_MTL_RC_CCS))
928 		return MAP_FAILED;
929 
930 	if (bo->meta.tiling == I915_TILING_NONE) {
931 		if (i915->has_mmap_offset) {
932 			struct drm_i915_gem_mmap_offset gem_map = { 0 };
933 			gem_map.handle = bo->handle.u32;
934 			gem_map.flags = I915_MMAP_OFFSET_WB;
935 
936 			/* Get the fake offset back */
937 			ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &gem_map);
938 			if (ret == 0)
939 				addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags),
940 					    MAP_SHARED, bo->drv->fd, gem_map.offset);
941 		} else {
942 			struct drm_i915_gem_mmap gem_map = { 0 };
943 			/* TODO(b/118799155): We don't seem to have a good way to
944 			 * detect the use cases for which WC mapping is really needed.
945 			 * The current heuristic seems overly coarse and may be slowing
946 			 * down some other use cases unnecessarily.
947 			 *
948 			 * For now, care must be taken not to use WC mappings for
949 			 * Renderscript and camera use cases, as they're
950 			 * performance-sensitive. */
951 			if ((bo->meta.use_flags & BO_USE_SCANOUT) &&
952 			    !(bo->meta.use_flags &
953 			      (BO_USE_RENDERSCRIPT | BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE)))
954 				gem_map.flags = I915_MMAP_WC;
955 
956 			gem_map.handle = bo->handle.u32;
957 			gem_map.offset = 0;
958 			gem_map.size = bo->meta.total_size;
959 
960 			ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_map);
961 			/* DRM_IOCTL_I915_GEM_MMAP mmaps the underlying shm
962 			 * file and returns a user space address directly, ie,
963 			 * doesn't go through mmap. If we try that on a
964 			 * dma-buf that doesn't have a shm file, i915.ko
965 			 * returns ENXIO.  Fall through to
966 			 * DRM_IOCTL_I915_GEM_MMAP_GTT in that case, which
967 			 * will mmap on the drm fd instead. */
968 			if (ret == 0)
969 				addr = (void *)(uintptr_t)gem_map.addr_ptr;
970 		}
971 	}
972 
973 	if (addr == MAP_FAILED) {
974 		struct drm_i915_gem_mmap_gtt gem_map = { 0 };
975 
976 		gem_map.handle = bo->handle.u32;
977 		ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gem_map);
978 		if (ret) {
979 			drv_loge("DRM_IOCTL_I915_GEM_MMAP_GTT failed\n");
980 			return MAP_FAILED;
981 		}
982 
983 		addr = mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED,
984 			    bo->drv->fd, gem_map.offset);
985 	}
986 
987 	if (addr == MAP_FAILED) {
988 		drv_loge("i915 GEM mmap failed\n");
989 		return addr;
990 	}
991 
992 	vma->length = bo->meta.total_size;
993 	return addr;
994 }
995 
i915_bo_invalidate(struct bo * bo,struct mapping * mapping)996 static int i915_bo_invalidate(struct bo *bo, struct mapping *mapping)
997 {
998 	int ret;
999 	struct drm_i915_gem_set_domain set_domain = { 0 };
1000 
1001 	set_domain.handle = bo->handle.u32;
1002 	if (bo->meta.tiling == I915_TILING_NONE) {
1003 		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1004 		if (mapping->vma->map_flags & BO_MAP_WRITE)
1005 			set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1006 	} else {
1007 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1008 		if (mapping->vma->map_flags & BO_MAP_WRITE)
1009 			set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1010 	}
1011 
1012 	ret = drmIoctl(bo->drv->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
1013 	if (ret) {
1014 		drv_loge("DRM_IOCTL_I915_GEM_SET_DOMAIN with %d\n", ret);
1015 		return ret;
1016 	}
1017 
1018 	return 0;
1019 }
1020 
i915_bo_flush(struct bo * bo,struct mapping * mapping)1021 static int i915_bo_flush(struct bo *bo, struct mapping *mapping)
1022 {
1023 	struct i915_device *i915 = bo->drv->priv;
1024 	if (!i915->has_llc && bo->meta.tiling == I915_TILING_NONE)
1025 		i915_clflush(mapping->vma->addr, mapping->vma->length);
1026 
1027 	return 0;
1028 }
1029 
1030 const struct backend backend_i915 = {
1031 	.name = "i915",
1032 	.init = i915_init,
1033 	.close = i915_close,
1034 	.bo_compute_metadata = i915_bo_compute_metadata,
1035 	.bo_create_from_metadata = i915_bo_create_from_metadata,
1036 	.bo_destroy = drv_gem_bo_destroy,
1037 	.bo_import = i915_bo_import,
1038 	.bo_map = i915_bo_map,
1039 	.bo_unmap = drv_bo_munmap,
1040 	.bo_invalidate = i915_bo_invalidate,
1041 	.bo_flush = i915_bo_flush,
1042 	.resolve_format_and_use_flags = drv_resolve_format_and_use_flags_helper,
1043 	.num_planes_from_modifier = i915_num_planes_from_modifier,
1044 };
1045 
1046 #endif
1047