1 /*
2 * Copyright 2018 The Chromium OS Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the LICENSE file.
5 */
6
7 #ifdef DRV_MSM
8
9 #include <assert.h>
10 #include <dlfcn.h>
11 #include <drm_fourcc.h>
12 #include <errno.h>
13 #include <inttypes.h>
14 #include <msm_drm.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/mman.h>
19 #include <xf86drm.h>
20
21 #include "drv_helpers.h"
22 #include "drv_priv.h"
23 #include "util.h"
24
25 /* Alignment values are based on SDM845 Gfx IP */
26 #define DEFAULT_ALIGNMENT 64
27 #define BUFFER_SIZE_ALIGN 4096
28
29 #define VENUS_STRIDE_ALIGN 128
30 #define VENUS_SCANLINE_ALIGN 16
31 #define NV12_LINEAR_PADDING (12 * 1024)
32 #define NV12_UBWC_PADDING(y_stride) (MAX(16 * 1024, y_stride * 48))
33 #define MACROTILE_WIDTH_ALIGN 64
34 #define MACROTILE_HEIGHT_ALIGN 16
35 #define PLANE_SIZE_ALIGN 4096
36
37 #define MSM_UBWC_TILING 1
38
39 static const uint32_t render_target_formats[] = { DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB8888,
40 DRM_FORMAT_RGB565, DRM_FORMAT_XBGR8888,
41 DRM_FORMAT_XRGB8888, DRM_FORMAT_ABGR2101010,
42 DRM_FORMAT_ABGR16161616F };
43
44 static const uint32_t texture_source_formats[] = { DRM_FORMAT_NV12, DRM_FORMAT_R8,
45 DRM_FORMAT_YVU420, DRM_FORMAT_YVU420_ANDROID,
46 DRM_FORMAT_P010 };
47
48 /*
49 * Each macrotile consists of m x n (mostly 4 x 4) tiles.
50 * Pixel data pitch/stride is aligned with macrotile width.
51 * Pixel data height is aligned with macrotile height.
52 * Entire pixel data buffer is aligned with 4k(bytes).
53 */
get_ubwc_meta_size(uint32_t width,uint32_t height,uint32_t tile_width,uint32_t tile_height)54 static uint32_t get_ubwc_meta_size(uint32_t width, uint32_t height, uint32_t tile_width,
55 uint32_t tile_height)
56 {
57 uint32_t macrotile_width, macrotile_height;
58
59 macrotile_width = DIV_ROUND_UP(width, tile_width);
60 macrotile_height = DIV_ROUND_UP(height, tile_height);
61
62 // Align meta buffer width to 64 blocks
63 macrotile_width = ALIGN(macrotile_width, MACROTILE_WIDTH_ALIGN);
64
65 // Align meta buffer height to 16 blocks
66 macrotile_height = ALIGN(macrotile_height, MACROTILE_HEIGHT_ALIGN);
67
68 return ALIGN(macrotile_width * macrotile_height, PLANE_SIZE_ALIGN);
69 }
70
get_pitch_alignment(struct bo * bo)71 static unsigned get_pitch_alignment(struct bo *bo)
72 {
73 switch (bo->meta.format) {
74 case DRM_FORMAT_NV12:
75 return VENUS_STRIDE_ALIGN;
76 case DRM_FORMAT_P010:
77 return VENUS_STRIDE_ALIGN * 2;
78 case DRM_FORMAT_YVU420:
79 case DRM_FORMAT_YVU420_ANDROID:
80 /* TODO other YUV formats? */
81 /* Something (in the video stack?) assumes the U/V planes can use
82 * half the pitch as the Y plane.. to componsate, double the
83 * alignment:
84 */
85 return 2 * DEFAULT_ALIGNMENT;
86 default:
87 return DEFAULT_ALIGNMENT;
88 }
89 }
90
msm_calculate_layout(struct bo * bo)91 static void msm_calculate_layout(struct bo *bo)
92 {
93 uint32_t width, height;
94
95 width = bo->meta.width;
96 height = bo->meta.height;
97
98 /* NV12 format requires extra padding with platform
99 * specific alignments for venus driver
100 */
101 if (bo->meta.format == DRM_FORMAT_NV12 || bo->meta.format == DRM_FORMAT_P010) {
102 uint32_t y_stride, uv_stride, y_scanline, uv_scanline, y_plane, uv_plane, size,
103 extra_padding;
104
105 // P010 has the same layout as NV12. The difference is that each
106 // pixel in P010 takes 2 bytes, while in NV12 each pixel takes 1 byte.
107 if (bo->meta.format == DRM_FORMAT_P010)
108 width *= 2;
109
110 y_stride = ALIGN(width, get_pitch_alignment(bo));
111 uv_stride = ALIGN(width, get_pitch_alignment(bo));
112 y_scanline = ALIGN(height, VENUS_SCANLINE_ALIGN * 2);
113 uv_scanline = ALIGN(DIV_ROUND_UP(height, 2),
114 VENUS_SCANLINE_ALIGN * (bo->meta.tiling ? 2 : 1));
115 y_plane = y_stride * y_scanline;
116 uv_plane = uv_stride * uv_scanline;
117
118 if (bo->meta.tiling == MSM_UBWC_TILING) {
119 y_plane = ALIGN(y_plane, PLANE_SIZE_ALIGN);
120 uv_plane = ALIGN(uv_plane, PLANE_SIZE_ALIGN);
121 y_plane += get_ubwc_meta_size(width, height, 32, 8);
122 uv_plane += get_ubwc_meta_size(width >> 1, height >> 1, 16, 8);
123 extra_padding = NV12_UBWC_PADDING(y_stride);
124 } else {
125 extra_padding = NV12_LINEAR_PADDING;
126 }
127
128 bo->meta.strides[0] = y_stride;
129 bo->meta.sizes[0] = y_plane;
130 bo->meta.offsets[1] = y_plane;
131 bo->meta.strides[1] = uv_stride;
132 size = y_plane + uv_plane + extra_padding;
133 bo->meta.total_size = ALIGN(size, BUFFER_SIZE_ALIGN);
134 bo->meta.sizes[1] = bo->meta.total_size - bo->meta.sizes[0];
135 } else {
136 uint32_t stride, alignw, alignh;
137
138 alignw = ALIGN(width, get_pitch_alignment(bo));
139 /* HAL_PIXEL_FORMAT_YV12 requires that the buffer's height not be aligned.
140 DRM_FORMAT_R8 of height one is used for JPEG camera output, so don't
141 height align that. */
142 if (bo->meta.format == DRM_FORMAT_YVU420_ANDROID ||
143 bo->meta.format == DRM_FORMAT_YVU420 ||
144 (bo->meta.format == DRM_FORMAT_R8 && height == 1)) {
145 assert(bo->meta.tiling != MSM_UBWC_TILING);
146 alignh = height;
147 } else {
148 alignh = ALIGN(height, DEFAULT_ALIGNMENT);
149 }
150
151 stride = drv_stride_from_format(bo->meta.format, alignw, 0);
152
153 /* Calculate size and assign stride, size, offset to each plane based on format */
154 drv_bo_from_format(bo, stride, 1, alignh, bo->meta.format);
155 if (bo->meta.format == DRM_FORMAT_YVU420_ANDROID ||
156 bo->meta.format == DRM_FORMAT_YVU420) {
157 const uint32_t u_size =
158 drv_size_from_format(bo->meta.format, bo->meta.strides[2], alignh, 2);
159 const uint32_t padding = ALIGN(u_size, PLANE_SIZE_ALIGN) - u_size;
160 bo->meta.total_size += padding;
161 }
162
163 /* For all RGB UBWC formats */
164 if (bo->meta.tiling == MSM_UBWC_TILING) {
165 bo->meta.sizes[0] += get_ubwc_meta_size(width, height, 16, 4);
166 bo->meta.total_size = bo->meta.sizes[0];
167 assert(IS_ALIGNED(bo->meta.total_size, BUFFER_SIZE_ALIGN));
168 }
169 }
170 }
171
is_ubwc_fmt(uint32_t format)172 static bool is_ubwc_fmt(uint32_t format)
173 {
174 switch (format) {
175 case DRM_FORMAT_XBGR8888:
176 case DRM_FORMAT_ABGR8888:
177 case DRM_FORMAT_XRGB8888:
178 case DRM_FORMAT_ARGB8888:
179 #ifndef QCOM_DISABLE_COMPRESSED_NV12
180 case DRM_FORMAT_NV12:
181 #endif
182 return 1;
183 default:
184 return 0;
185 }
186 }
187
msm_add_ubwc_combinations(struct driver * drv,const uint32_t * formats,uint32_t num_formats,struct format_metadata * metadata,uint64_t use_flags)188 static void msm_add_ubwc_combinations(struct driver *drv, const uint32_t *formats,
189 uint32_t num_formats, struct format_metadata *metadata,
190 uint64_t use_flags)
191 {
192 for (uint32_t i = 0; i < num_formats; i++) {
193 if (is_ubwc_fmt(formats[i])) {
194 struct combination combo = { .format = formats[i],
195 .metadata = *metadata,
196 .use_flags = use_flags };
197 drv_array_append(drv->combos, &combo);
198 }
199 }
200 }
201
202 /**
203 * Check for buggy apps that are known to not support modifiers, to avoid surprising them
204 * with a UBWC buffer.
205 */
should_avoid_ubwc(void)206 static bool should_avoid_ubwc(void)
207 {
208 #ifndef __ANDROID__
209 /* waffle is buggy and, requests a renderable buffer (which on qcom platforms, we
210 * want to use UBWC), and then passes it to the kernel discarding the modifier.
211 * So mesa ends up correctly rendering to as tiled+compressed, but kernel tries
212 * to display as linear. Other platforms do not see this issue, simply because
213 * they only use compressed (ex, AFBC) with the BO_USE_SCANOUT flag.
214 *
215 * See b/163137550
216 */
217 if (dlsym(RTLD_DEFAULT, "waffle_display_connect")) {
218 drv_logi("WARNING: waffle detected, disabling UBWC\n");
219 return true;
220 }
221 #endif
222 return false;
223 }
224
msm_init(struct driver * drv)225 static int msm_init(struct driver *drv)
226 {
227 struct format_metadata metadata;
228 uint64_t render_use_flags = BO_USE_RENDER_MASK | BO_USE_SCANOUT;
229 uint64_t texture_use_flags = BO_USE_TEXTURE_MASK | BO_USE_HW_VIDEO_DECODER;
230 /*
231 * NOTE: we actually could use tiled in the BO_USE_FRONT_RENDERING case,
232 * if we had a modifier for tiled-but-not-compressed. But we *cannot* use
233 * compressed in this case because the UBWC flags/meta data can be out of
234 * sync with pixel data while the GPU is writing a frame out to memory.
235 */
236 uint64_t sw_flags =
237 (BO_USE_RENDERSCRIPT | BO_USE_SW_MASK | BO_USE_LINEAR | BO_USE_FRONT_RENDERING);
238
239 drv_add_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
240 &LINEAR_METADATA, render_use_flags);
241
242 drv_add_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
243 &LINEAR_METADATA, texture_use_flags);
244
245 /* The camera stack standardizes on NV12 for YUV buffers. */
246 /* YVU420 and NV12 formats for camera, display and encoding. */
247 drv_modify_combination(drv, DRM_FORMAT_NV12, &LINEAR_METADATA,
248 BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_SCANOUT |
249 BO_USE_HW_VIDEO_ENCODER);
250
251 /*
252 * R8 format is used for Android's HAL_PIXEL_FORMAT_BLOB and is used for JPEG snapshots
253 * from camera and input/output from hardware decoder/encoder.
254 */
255 drv_modify_combination(drv, DRM_FORMAT_R8, &LINEAR_METADATA,
256 BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE | BO_USE_HW_VIDEO_DECODER |
257 BO_USE_HW_VIDEO_ENCODER | BO_USE_GPU_DATA_BUFFER |
258 BO_USE_SENSOR_DIRECT_DATA);
259
260 /*
261 * Android also frequently requests YV12 formats for some camera implementations
262 * (including the external provider implmenetation).
263 */
264 drv_modify_combination(drv, DRM_FORMAT_YVU420_ANDROID, &LINEAR_METADATA,
265 BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE);
266 drv_modify_combination(drv, DRM_FORMAT_YVU420, &LINEAR_METADATA,
267 BO_USE_CAMERA_READ | BO_USE_CAMERA_WRITE);
268
269 /* Android CTS tests require this. */
270 drv_add_combination(drv, DRM_FORMAT_BGR888, &LINEAR_METADATA, BO_USE_SW_MASK);
271
272 #ifdef SC_7280
273 drv_modify_combination(drv, DRM_FORMAT_P010, &LINEAR_METADATA,
274 BO_USE_SCANOUT | BO_USE_HW_VIDEO_ENCODER);
275 #endif
276
277 drv_modify_linear_combinations(drv);
278
279 if (should_avoid_ubwc() || !drv->compression)
280 return 0;
281
282 metadata.tiling = MSM_UBWC_TILING;
283 metadata.priority = 2;
284 metadata.modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
285
286 render_use_flags &= ~sw_flags;
287 texture_use_flags &= ~sw_flags;
288
289 msm_add_ubwc_combinations(drv, render_target_formats, ARRAY_SIZE(render_target_formats),
290 &metadata, render_use_flags);
291
292 msm_add_ubwc_combinations(drv, texture_source_formats, ARRAY_SIZE(texture_source_formats),
293 &metadata, texture_use_flags);
294
295 drv_modify_combination(drv, DRM_FORMAT_NV12, &metadata,
296 BO_USE_SCANOUT | BO_USE_HW_VIDEO_ENCODER);
297
298 return 0;
299 }
300
msm_bo_create_for_modifier(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,const uint64_t modifier)301 static int msm_bo_create_for_modifier(struct bo *bo, uint32_t width, uint32_t height,
302 uint32_t format, const uint64_t modifier)
303 {
304 struct drm_msm_gem_new req = { 0 };
305 int ret;
306
307 bo->meta.tiling = (modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) ? MSM_UBWC_TILING : 0;
308 msm_calculate_layout(bo);
309
310 req.flags = MSM_BO_WC | MSM_BO_SCANOUT;
311 req.size = bo->meta.total_size;
312
313 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_MSM_GEM_NEW, &req);
314 if (ret) {
315 drv_loge("DRM_IOCTL_MSM_GEM_NEW failed with %s\n", strerror(errno));
316 return -errno;
317 }
318
319 bo->handle.u32 = req.handle;
320
321 bo->meta.format_modifier = modifier;
322 return 0;
323 }
324
msm_bo_create_with_modifiers(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,const uint64_t * modifiers,uint32_t count)325 static int msm_bo_create_with_modifiers(struct bo *bo, uint32_t width, uint32_t height,
326 uint32_t format, const uint64_t *modifiers, uint32_t count)
327 {
328 static const uint64_t modifier_order[] = {
329 DRM_FORMAT_MOD_QCOM_COMPRESSED,
330 DRM_FORMAT_MOD_LINEAR,
331 };
332
333 uint64_t modifier =
334 drv_pick_modifier(modifiers, count, modifier_order, ARRAY_SIZE(modifier_order));
335
336 if (!bo->drv->compression && modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED)
337 modifier = DRM_FORMAT_MOD_LINEAR;
338
339 return msm_bo_create_for_modifier(bo, width, height, format, modifier);
340 }
341
342 /* msm_bo_create will create linear buffers for now */
msm_bo_create(struct bo * bo,uint32_t width,uint32_t height,uint32_t format,uint64_t flags)343 static int msm_bo_create(struct bo *bo, uint32_t width, uint32_t height, uint32_t format,
344 uint64_t flags)
345 {
346 struct combination *combo = drv_get_combination(bo->drv, format, flags);
347
348 if (!combo) {
349 drv_loge("invalid format = %d, flags = %" PRIx64 " combination\n", format, flags);
350 return -EINVAL;
351 }
352
353 return msm_bo_create_for_modifier(bo, width, height, format, combo->metadata.modifier);
354 }
355
msm_bo_map(struct bo * bo,struct vma * vma,uint32_t map_flags)356 static void *msm_bo_map(struct bo *bo, struct vma *vma, uint32_t map_flags)
357 {
358 int ret;
359 struct drm_msm_gem_info req = { 0 };
360
361 if (bo->meta.format_modifier)
362 return MAP_FAILED;
363
364 req.handle = bo->handle.u32;
365 ret = drmIoctl(bo->drv->fd, DRM_IOCTL_MSM_GEM_INFO, &req);
366 if (ret) {
367 drv_loge("DRM_IOCLT_MSM_GEM_INFO failed with %s\n", strerror(errno));
368 return MAP_FAILED;
369 }
370 vma->length = bo->meta.total_size;
371
372 return mmap(0, bo->meta.total_size, drv_get_prot(map_flags), MAP_SHARED, bo->drv->fd,
373 req.offset);
374 }
375
376 const struct backend backend_msm = {
377 .name = "msm",
378 .init = msm_init,
379 .bo_create = msm_bo_create,
380 .bo_create_with_modifiers = msm_bo_create_with_modifiers,
381 .bo_destroy = drv_gem_bo_destroy,
382 .bo_import = drv_prime_bo_import,
383 .bo_map = msm_bo_map,
384 .bo_unmap = drv_bo_munmap,
385 .resolve_format_and_use_flags = drv_resolve_format_and_use_flags_helper,
386 };
387 #endif /* DRV_MSM */
388