1 /*
2 * Copyright 2024 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "perf/xe/intel_perf.h"
7
8 #include <fcntl.h>
9 #include <sys/stat.h>
10
11 #include "perf/intel_perf.h"
12 #include "intel_perf_common.h"
13 #include "intel/common/intel_gem.h"
14
15 #include "drm-uapi/xe_drm.h"
16
17 #define FIELD_PREP_ULL(_mask, _val) (((_val) << (ffsll(_mask) - 1)) & (_mask))
18
xe_perf_get_oa_format(struct intel_perf_config * perf)19 uint64_t xe_perf_get_oa_format(struct intel_perf_config *perf)
20 {
21 uint64_t fmt;
22
23 if (perf->devinfo->verx10 >= 200) {
24 /* BSpec: 60942
25 * PEC64u64
26 */
27 fmt = FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, DRM_XE_OA_FMT_TYPE_PEC);
28 fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, 1);
29 fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, 1);
30 fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_BC_REPORT, 0);
31 } else {
32 /* BSpec: 52198
33 * same as I915_OA_FORMAT_A24u40_A14u32_B8_C8 and
34 * I915_OA_FORMAT_A32u40_A4u32_B8_C8 returned for gfx 125+ and gfx 120
35 * respectively.
36 */
37 fmt = FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, DRM_XE_OA_FMT_TYPE_OAG);
38 fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, 5);
39 fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, 0);
40 fmt |= FIELD_PREP_ULL(DRM_XE_OA_FORMAT_MASK_BC_REPORT, 0);
41 }
42
43 return fmt;
44 }
45
46 bool
xe_oa_metrics_available(struct intel_perf_config * perf,int fd,bool use_register_snapshots)47 xe_oa_metrics_available(struct intel_perf_config *perf, int fd, bool use_register_snapshots)
48 {
49 bool perf_oa_available = false;
50 struct stat sb;
51
52 /* The existence of this file implies that this Xe KMD version supports
53 * observation interface.
54 */
55 if (stat("/proc/sys/dev/xe/observation_paranoid", &sb) == 0) {
56 uint64_t paranoid = 1;
57
58 /* Now we need to check if application has privileges to access observation
59 * interface.
60 *
61 * TODO: this approach does not takes into account applications running
62 * with CAP_PERFMON privileges.
63 */
64 read_file_uint64("/proc/sys/dev/xe/observation_paranoid", ¶noid);
65 if (paranoid == 0 || geteuid() == 0)
66 perf_oa_available = true;
67 }
68
69 if (!perf_oa_available)
70 return perf_oa_available;
71
72 perf->features_supported |= INTEL_PERF_FEATURE_HOLD_PREEMPTION;
73
74 return perf_oa_available;
75 }
76
77 uint64_t
xe_add_config(struct intel_perf_config * perf,int fd,const struct intel_perf_registers * config,const char * guid)78 xe_add_config(struct intel_perf_config *perf, int fd,
79 const struct intel_perf_registers *config,
80 const char *guid)
81 {
82 struct drm_xe_oa_config xe_config = {};
83 struct drm_xe_observation_param observation_param = {
84 .observation_type = DRM_XE_OBSERVATION_TYPE_OA,
85 .observation_op = DRM_XE_OBSERVATION_OP_ADD_CONFIG,
86 .param = (uintptr_t)&xe_config,
87 };
88 uint32_t *regs;
89 int ret;
90
91 memcpy(xe_config.uuid, guid, sizeof(xe_config.uuid));
92
93 xe_config.n_regs = config->n_mux_regs + config->n_b_counter_regs + config->n_flex_regs;
94 assert(xe_config.n_regs > 0);
95
96 regs = malloc(sizeof(uint64_t) * xe_config.n_regs);
97 xe_config.regs_ptr = (uintptr_t)regs;
98
99 memcpy(regs, config->mux_regs, config->n_mux_regs * sizeof(uint64_t));
100 regs += 2 * config->n_mux_regs;
101 memcpy(regs, config->b_counter_regs, config->n_b_counter_regs * sizeof(uint64_t));
102 regs += 2 * config->n_b_counter_regs;
103 memcpy(regs, config->flex_regs, config->n_flex_regs * sizeof(uint64_t));
104
105 ret = intel_ioctl(fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
106 free((void*)(uintptr_t)xe_config.regs_ptr);
107 return ret > 0 ? ret : 0;
108 }
109
110 void
xe_remove_config(struct intel_perf_config * perf,int fd,uint64_t config_id)111 xe_remove_config(struct intel_perf_config *perf, int fd, uint64_t config_id)
112 {
113 struct drm_xe_observation_param observation_param = {
114 .observation_type = DRM_XE_OBSERVATION_TYPE_OA,
115 .observation_op = DRM_XE_OBSERVATION_OP_REMOVE_CONFIG,
116 .param = (uintptr_t)&config_id,
117 };
118
119 intel_ioctl(fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
120 }
121
122 static void
oa_prop_set(struct drm_xe_ext_set_property * props,uint32_t * index,enum drm_xe_oa_property_id prop_id,uint64_t value)123 oa_prop_set(struct drm_xe_ext_set_property *props, uint32_t *index,
124 enum drm_xe_oa_property_id prop_id, uint64_t value)
125 {
126 if (*index > 0)
127 props[*index - 1].base.next_extension = (uintptr_t)&props[*index];
128
129 props[*index].base.name = DRM_XE_OA_EXTENSION_SET_PROPERTY;
130 props[*index].property = prop_id;
131 props[*index].value = value;
132 *index = *index + 1;
133 }
134
135 int
xe_perf_stream_open(struct intel_perf_config * perf_config,int drm_fd,uint32_t exec_id,uint64_t metrics_set_id,uint64_t report_format,uint64_t period_exponent,bool hold_preemption,bool enable)136 xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
137 uint32_t exec_id, uint64_t metrics_set_id,
138 uint64_t report_format, uint64_t period_exponent,
139 bool hold_preemption, bool enable)
140 {
141 struct drm_xe_ext_set_property props[DRM_XE_OA_PROPERTY_NO_PREEMPT + 1] = {};
142 struct drm_xe_observation_param observation_param = {
143 .observation_type = DRM_XE_OBSERVATION_TYPE_OA,
144 .observation_op = DRM_XE_OBSERVATION_OP_STREAM_OPEN,
145 .param = (uintptr_t)&props,
146 };
147 uint32_t i = 0;
148 int fd, flags;
149
150 if (exec_id)
151 oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, exec_id);
152 oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_DISABLED, !enable);
153 oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_SAMPLE_OA, true);
154 oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_METRIC_SET, metrics_set_id);
155 oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_FORMAT, report_format);
156 oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, period_exponent);
157 if (hold_preemption)
158 oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_NO_PREEMPT, hold_preemption);
159
160 fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
161 if (fd < 0)
162 return fd;
163
164 flags = fcntl(fd, F_GETFL, 0);
165 flags |= O_CLOEXEC | O_NONBLOCK;
166 if (fcntl(fd, F_SETFL, flags)) {
167 close(fd);
168 return -1;
169 }
170
171 return fd;
172 }
173
174 int
xe_perf_stream_set_state(int perf_stream_fd,bool enable)175 xe_perf_stream_set_state(int perf_stream_fd, bool enable)
176 {
177 unsigned long uapi = enable ? DRM_XE_OBSERVATION_IOCTL_ENABLE :
178 DRM_XE_OBSERVATION_IOCTL_DISABLE;
179
180 return intel_ioctl(perf_stream_fd, uapi, 0);
181 }
182
183 int
xe_perf_stream_set_metrics_id(int perf_stream_fd,uint64_t metrics_set_id)184 xe_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id)
185 {
186 struct drm_xe_ext_set_property prop = {};
187 uint32_t index = 0;
188
189 oa_prop_set(&prop, &index, DRM_XE_OA_PROPERTY_OA_METRIC_SET,
190 metrics_set_id);
191 return intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
192 (void *)(uintptr_t)&prop);
193 }
194
195 static int
xe_perf_stream_read_error(int perf_stream_fd,uint8_t * buffer,size_t buffer_len)196 xe_perf_stream_read_error(int perf_stream_fd, uint8_t *buffer, size_t buffer_len)
197 {
198 struct drm_xe_oa_stream_status status = {};
199 struct intel_perf_record_header *header;
200 int ret;
201
202 ret = intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_STATUS, &status);
203 if (ret)
204 return -errno;
205
206 header = (struct intel_perf_record_header *)buffer;
207 header->pad = 0;
208 header->type = 0;
209 header->size = sizeof(*header);
210 ret = header->size;
211
212 if (status.oa_status & INTEL_PERF_RECORD_TYPE_OA_BUFFER_LOST)
213 header->type = INTEL_PERF_RECORD_TYPE_OA_BUFFER_LOST;
214 else if (status.oa_status & DRM_XE_OASTATUS_REPORT_LOST)
215 header->type = INTEL_PERF_RECORD_TYPE_OA_REPORT_LOST;
216 else if (status.oa_status & DRM_XE_OASTATUS_COUNTER_OVERFLOW)
217 header->type = INTEL_PERF_RECORD_TYPE_COUNTER_OVERFLOW;
218 else if (status.oa_status & DRM_XE_OASTATUS_MMIO_TRG_Q_FULL)
219 header->type = INTEL_PERF_RECORD_TYPE_MMIO_TRG_Q_FULL;
220 else
221 unreachable("missing");
222
223 return header->type ? header->size : -1;
224 }
225
226 int
xe_perf_stream_read_samples(struct intel_perf_config * perf_config,int perf_stream_fd,uint8_t * buffer,size_t buffer_len)227 xe_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd,
228 uint8_t *buffer, size_t buffer_len)
229 {
230 const size_t sample_size = perf_config->oa_sample_size;
231 const size_t sample_header_size = sample_size + sizeof(struct intel_perf_record_header);
232 uint32_t num_samples = buffer_len / sample_header_size;
233 const size_t max_bytes_read = num_samples * sample_size;
234 uint8_t *offset, *offset_samples;
235 int len, i;
236
237 if (buffer_len < sample_header_size)
238 return -ENOSPC;
239
240 do {
241 len = read(perf_stream_fd, buffer, max_bytes_read);
242 } while (len < 0 && errno == EINTR);
243
244 if (len <= 0) {
245 if (errno == EIO)
246 return xe_perf_stream_read_error(perf_stream_fd, buffer, buffer_len);
247
248 return len < 0 ? -errno : 0;
249 }
250
251 num_samples = len / sample_size;
252 offset = buffer;
253 offset_samples = buffer + (buffer_len - len);
254 /* move all samples to the end of buffer */
255 memmove(offset_samples, buffer, len);
256
257 /* setup header, then copy sample from the end of buffer */
258 for (i = 0; i < num_samples; i++) {
259 struct intel_perf_record_header *header = (struct intel_perf_record_header *)offset;
260
261 /* TODO: also append REPORT_LOST and BUFFER_LOST */
262 header->type = INTEL_PERF_RECORD_TYPE_SAMPLE;
263 header->pad = 0;
264 header->size = sample_header_size;
265 offset += sizeof(*header);
266
267 memmove(offset, offset_samples, sample_size);
268 offset += sample_size;
269 offset_samples += sample_size;
270 }
271
272 return offset - buffer;
273 }
274