1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 // PatchInfo linker.
24 //
25
26 #include <cassert>
27 #include <cstddef>
28 #include <cstdint>
29 #include <cstdio>
30
31 #include "cm_fc_ld.h"
32
33 #include "DepGraph.h"
34 #ifdef __WITH_IGA__
35 #include "IGAAutoDep.h"
36 #endif
37 #include "PatchInfoLinker.h"
38 #include "PatchInfoReader.h"
39
40 #ifndef __WITH_IGA__
41 namespace cm {
42 namespace toolchain {
43
44 // Dummy hook for policy 2.
45 std::pair<int, std::string>
resolvDep(unsigned P,const char * Bin,std::size_t Sz)46 resolvDep(unsigned P, const char *Bin, std::size_t Sz) {
47 std::string Buf(Bin, Sz);
48 return std::make_pair(0, Buf);
49 }
50
51 } // End namespace toolchain
52 } // End namespace cm
53 #endif
54
55 namespace {
56
57 class PatchInfoLinker {
58 std::size_t NumKernels;
59 cm_fc_kernel_t *Kernels;
60
61 const char *Options;
62
63 std::string Linked;
64
65 unsigned Policy;
66 unsigned r127Token;
67 bool hasR127Token;
68
69 unsigned Platform;
70
71 public:
PatchInfoLinker(std::size_t NK,cm_fc_kernel_t * K,const char * O=nullptr)72 PatchInfoLinker(std::size_t NK, cm_fc_kernel_t *K, const char *O = nullptr)
73 : NumKernels(NK), Kernels(K), Options(O),
74 Policy(cm::patch::DepGraph::SWSB_POLICY_1),
75 r127Token(-1),
76 hasR127Token(false),
77 Platform(0) {
78 parseOptions();
79 }
80
81 bool link(cm::patch::Collection &C);
82
83 protected:
84 unsigned align(unsigned);
85 unsigned writeNOP(unsigned);
86 unsigned writeEOT();
87
parseOptions()88 void parseOptions() {
89 std::string Opt;
90 if (Options)
91 Opt = Options;
92
93 if (Opt.empty())
94 return;
95
96 std::string::size_type pos = 0;
97 do {
98 pos = Opt.find_first_not_of(':', pos);
99 if (pos == std::string::npos)
100 break;
101 switch (Opt[pos]) {
102 default:
103 break;
104 case 'p':
105 ++pos;
106 if (pos >= Opt.size())
107 break;
108 switch (Opt[pos]) {
109 default:
110 break;
111 case '0':
112 Policy = cm::patch::DepGraph::SWSB_POLICY_0;
113 ++pos;
114 break;
115 case '1':
116 Policy = cm::patch::DepGraph::SWSB_POLICY_1;
117 ++pos;
118 break;
119 case '2':
120 Policy = cm::patch::DepGraph::SWSB_POLICY_2;
121 ++pos;
122 break;
123 }
124 }
125 if (pos >= Opt.size())
126 break;
127 pos = Opt.find_first_of(':', pos);
128 if (pos == std::string::npos)
129 break;
130 ++pos;
131 } while (pos < Opt.size());
132 }
133
134 unsigned writeSync(unsigned RdMask, unsigned WrMask);
135 };
136
137 } // End anonymous namespace
138
linkPatchInfo(cm::patch::Collection & C,std::size_t NumKernels,cm_fc_kernel_t * Kernels,const char * Options)139 bool linkPatchInfo(cm::patch::Collection &C,
140 std::size_t NumKernels, cm_fc_kernel_t *Kernels,
141 const char *Options) {
142 PatchInfoLinker LD(NumKernels, Kernels, Options);
143 return LD.link(C);
144 }
145
link(cm::patch::Collection & C)146 bool PatchInfoLinker::link(cm::patch::Collection &C) {
147 for (unsigned i = 0, e = unsigned(NumKernels); i != e; ++i)
148 if (readPatchInfo(Kernels[i].patch_buf, Kernels[i].patch_size, C))
149 return true;
150
151 Platform = C.getPlatform();
152
153 std::map<cm::patch::Binary *, cm::patch::Symbol *> BinMap;
154 // Setup mapping from binary to symbol.
155 for (auto I = C.sym_begin(), E = C.sym_end(); I != E; ++I) {
156 // Bail out if there's unresolved symbol.
157 if (I->isUnresolved())
158 return true;
159 if (I->getAddr() == 0) {
160 BinMap[I->getBinary()] = &*I;
161 I->getBinary()->setName(&*I);
162 }
163 }
164
165 // Associate separate binaries and find the last top-level kernel.
166 cm::patch::Binary *LastTopBin = nullptr;
167 unsigned n = 0;
168 for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
169 auto &B = *I;
170 if (n >= NumKernels)
171 return true;
172 B.setData(Kernels[n].binary_buf);
173 B.setSize(Kernels[n].binary_size);
174 B.clearSyncPoints();
175 ++n;
176 // Check link type through its symbol.
177 auto M = BinMap.find(&B);
178 if (M == BinMap.end()) // Bail out if there's binary without symbol name.
179 return true;
180 auto S = M->second;
181 B.setLinkType(S->getExtra() & 0x3);
182 if (B.getLinkType() != CM_FC_LINK_TYPE_CALLEE)
183 {
184 LastTopBin = &B;
185 for (auto RI = LastTopBin->finireg_begin(),
186 RE = LastTopBin->finireg_end(); RI != RE; ++RI) {
187 // Skip use-only access without token associated.
188 if (RI->isDefNotByToken())
189 continue;
190 unsigned Reg = RI->getRegNo();
191 if (Reg == 127)
192 {
193 std::tie(hasR127Token, r127Token) = RI->getToken();
194 }
195 }
196 }
197 }
198 // Bail out if there is mismatch.
199 if (n != NumKernels)
200 return true;
201
202 cm::patch::DepGraph DG(C, Policy);
203 DG.build();
204 DG.resolve();
205
206 // Link all kernels into 'linked' buffer.
207 Linked.clear();
208 for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
209 auto Bin = &*I;
210 align(4); // Align to 16B, i.e. 1 << 4.
211 // Real binary starts from here.
212 Bin->setPos(unsigned(Linked.size()));
213 Bin->sortSyncPoints();
214 unsigned Start = 0;
215 unsigned Inserted = 0;
216 for (auto SI = Bin->sp_begin(), SE = Bin->sp_end(); SI != SE; ++SI) {
217 auto Node = *SI;
218 unsigned Offset = Node->getOffset();
219 assert(Start <= Offset && "Invalid insert point!");
220 if (Start < Offset) {
221 Linked.append(Bin->getData() + Start, Offset - Start);
222 // Adjust relocation in this range.
223 for (auto RI = Bin->rel_begin(), RE = Bin->rel_end(); RI != RE; ++RI) {
224 unsigned RelOff = RI->getOffset();
225 if (Start <= RelOff && RelOff < Offset)
226 RI->setOffset(RelOff + Inserted);
227 }
228 }
229 Start = Offset;
230 if (Platform < cm::patch::PP_PVC)
231 {
232 Inserted += writeSync(Node->getRdTokenMask(), Node->getWrTokenMask());
233 }
234 }
235 Linked.append(Bin->getData() + Start, Bin->getSize() - Start);
236 for (auto RI = Bin->rel_begin(), RE = Bin->rel_end(); RI != RE; ++RI) {
237 unsigned RelOff = RI->getOffset();
238 if (Start <= RelOff && RelOff < Bin->getSize())
239 RI->setOffset(RelOff + Inserted);
240 }
241 if (Bin == LastTopBin)
242 writeEOT();
243 }
244 writeNOP(64);
245
246 // Fix relocations.
247 for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
248 auto Bin = &*I;
249 unsigned Offset = Bin->getPos();
250 // Bail out if this binary is not in the position map.
251 if (Offset == unsigned(-1))
252 return true;
253 for (auto RI = Bin->rel_begin(), RE = Bin->rel_end(); RI != RE; ++RI) {
254 auto S = RI->getSymbol();
255 auto Target = S->getBinary();
256 unsigned TargetOffset = Target->getPos();
257 // Bail out if this binary is not in the position map.
258 if (TargetOffset == unsigned(-1))
259 return true;
260 unsigned AbsIP = Offset + RI->getOffset();
261 unsigned AbsJIP = TargetOffset + S->getAddr();
262 int Imm = AbsJIP - AbsIP;
263 uint32_t *p = reinterpret_cast<uint32_t *>(&Linked[AbsIP]);
264 p[3] = Imm;
265 ++n;
266 }
267 }
268
269 if (Policy == cm::patch::DepGraph::SWSB_POLICY_2) {
270 std::string Out;
271 int Ret;
272 std::tie(Ret, Out) =
273 cm::toolchain::resolvDep(Platform, Linked.data(), Linked.size());
274 if (Ret < 0)
275 return true;
276 Linked.swap(Out);
277 }
278
279 C.setLinkedBinary(std::move(Linked));
280
281 return false;
282 }
283
align(unsigned Align)284 unsigned PatchInfoLinker::align(unsigned Align) {
285 unsigned A = (1U << Align);
286 unsigned Origin = unsigned(Linked.size());
287 unsigned Padding = ((Origin + A - 1) / A) * A;
288 return writeNOP(Padding - Origin);
289 }
290
291 /// Append Linked with 'nop' taking up to N bytes.
writeNOP(unsigned N)292 unsigned PatchInfoLinker::writeNOP(unsigned N) {
293 // Bail out if N is not aligned with 8B, i.e. 64 bits. That's the minimal
294 // size of 'nop' instruction.
295 if (N % 8 != 0)
296 return 0;
297 uint64_t regular_nop = 0;
298 uint64_t compact_nop = 0;
299 switch (Platform) {
300 case cm::patch::PP_TGL:
301 case cm::patch::PP_DG2:
302 case cm::patch::PP_PVC:
303 case cm::patch::PP_ELF:
304 regular_nop = 0x00000060U;
305 compact_nop = 0x20000060U;
306 break;
307 default:
308 regular_nop = 0x0000007eU;
309 compact_nop = 0x2000007eU;
310 break;
311 }
312 unsigned B = 0;
313 while (N > 8) {
314 Linked.append(reinterpret_cast<char *>(®ular_nop), sizeof(regular_nop));
315 Linked.append(sizeof(uint64_t), 0);
316 N -= 16;
317 B += 16;
318 }
319 while (N > 0) {
320 Linked.append(reinterpret_cast<char *>(&compact_nop), sizeof(compact_nop));
321 N -= 8;
322 B += 8;
323 }
324 return B;
325 }
326
writeEOT()327 unsigned PatchInfoLinker::writeEOT() {
328 uint64_t mov0 = 0;
329 uint64_t mov1 = 0;
330 uint64_t snd0 = 0;
331 uint64_t snd1 = 0;
332 uint64_t r127_sync0 = 0;
333 uint64_t r127_sync1 = 0;
334 uint64_t r127_1_sync0 = 0;
335 uint64_t r127_1_sync1 = 0;
336
337 switch (Platform) {
338 case cm::patch::PP_TGL:
339 {
340 if (hasR127Token)
341 {
342 uint8_t *sync0Ptr = (uint8_t *)&r127_sync0;
343 r127_sync0 = 0x0001000000002001ULL;
344 r127_sync1 = 0x0000000000000000ULL;
345 sync0Ptr[1] |= (uint8_t)r127Token;
346 }
347 mov0 = 0x7f050aa080030161ULL;
348 mov1 = 0x0000000000460005ULL;
349 snd0 = 0x0000000400000131ULL;
350 snd1 = 0x0000000070207f0cULL;
351 }
352 break;
353 case cm::patch::PP_DG2:
354 {
355 if (hasR127Token)
356 {
357 uint8_t *sync0Ptr = (uint8_t *)&r127_sync0;
358 r127_sync0 = 0x0001000000002001ULL;
359 r127_sync1 = 0x0000000000000000ULL;
360 sync0Ptr[1] |= (uint8_t)r127Token;
361 }
362 mov0 = 0x7f050aa080030961ULL;
363 mov1 = 0x0000000000100004ULL;
364 snd0 = 0x0000000480000931ULL;
365 snd1 = 0x0000000030207f0cULL;
366 }
367 break;
368 case cm::patch::PP_PVC:
369 {
370 if (hasR127Token)
371 {
372 r127_sync0 = 0x0001000000008001ULL;
373 r127_sync1 = 0x0000000000000000ULL;
374 r127_1_sync0 = 0x000100000000A001ULL;
375 r127_1_sync1 = 0x0000000000000000ULL;
376 uint8_t* sync0Ptr = (uint8_t*)& r127_sync0;
377 sync0Ptr[1] |= (uint8_t)r127Token;
378 sync0Ptr = (uint8_t*)& r127_1_sync0;
379 sync0Ptr[1] |= (uint8_t)r127Token;
380 }
381 mov0 = 0x7f050aa0800c0961ULL;
382 mov1 = 0x0000000000100004ULL;
383 snd0 = 0x00000004800c0931ULL;
384 snd1 = 0x0000000030207f0cULL;
385 }
386 break;
387 case cm::patch::PP_ELF:
388 {
389 if (hasR127Token)
390 {
391 r127_sync0 = 0x0001000000008001ULL;
392 r127_sync1 = 0x0000000000000000ULL;
393 r127_1_sync0 = 0x000100000000A001ULL;
394 r127_1_sync1 = 0x0000000000000000ULL;
395 uint8_t* sync0Ptr = (uint8_t*)&r127_sync0;
396 sync0Ptr[1] |= (uint8_t)r127Token;
397 sync0Ptr = (uint8_t*)&r127_1_sync0;
398 sync0Ptr[1] |= (uint8_t)r127Token;
399 }
400
401 mov0 = 0x7f050aa0800c0961ULL;
402 mov1 = 0x0000000000100004ULL;
403 snd0 = 0x00000004800c0931ULL;
404 snd1 = 0x0000000030207f0cULL;
405 }
406 break;
407 default:
408 mov0 = 0x2fe0020c00600001ULL;
409 mov1 = 0x00000000008d0000ULL;
410 snd0 = 0x2000020007000031ULL;
411 snd1 = 0x8200001006000fe0ULL;
412 break;
413 }
414 unsigned B = 0;
415 if (hasR127Token)
416 {
417 Linked.append(reinterpret_cast<char *>(&r127_sync0), sizeof(r127_sync0));
418 B += sizeof(r127_sync0);
419 Linked.append(reinterpret_cast<char *>(&r127_sync1), sizeof(r127_sync1));
420 B += sizeof(r127_sync1);
421 if (Platform == cm::patch::PP_PVC)
422 {
423 Linked.append(reinterpret_cast<char*>(&r127_1_sync0), sizeof(r127_1_sync0));
424 B += sizeof(r127_1_sync0);
425 Linked.append(reinterpret_cast<char*>(&r127_1_sync1), sizeof(r127_1_sync1));
426 B += sizeof(r127_1_sync1);
427 }
428 }
429 Linked.append(reinterpret_cast<char *>(&mov0), sizeof(mov0));
430 B += sizeof(mov0);
431 Linked.append(reinterpret_cast<char *>(&mov1), sizeof(mov1));
432 B += sizeof(mov1);
433 Linked.append(reinterpret_cast<char *>(&snd0), sizeof(snd0));
434 B += sizeof(snd0);
435 Linked.append(reinterpret_cast<char *>(&snd1), sizeof(snd1));
436 B += sizeof(snd1);
437
438 return B;
439 }
440
441
writeSync(unsigned RdMask,unsigned WrMask)442 unsigned PatchInfoLinker::writeSync(unsigned RdMask, unsigned WrMask) {
443 uint64_t sysrd0 = 0x0001000000000101;
444 uint64_t sysrd1 = 0x0000000020000000;
445 uint64_t syswr0 = 0x0001000000000001;
446 uint64_t syswr1 = 0x0000000030000000;
447
448 // Force 1 distance to sync in-order instruction.
449 uint64_t swsb_mask = 0x000000000000FF00;
450
451 // Subfuncs on qword1.
452 uint64_t fc_mask = 0x00000000F0000000;
453 uint64_t nop = 0x0000000000000000;
454 uint64_t allrd = 0x0000000020000000;
455 uint64_t allwr = 0x0000000030000000;
456
457 uint64_t dist = 1;
458 unsigned B = 0;
459
460 if (RdMask == unsigned(-1)) {
461 uint64_t qw0 = (sysrd0 & ~swsb_mask) | (dist << 8);
462 uint64_t qw1 = (sysrd1 & ~fc_mask) | allrd;
463 Linked.append(reinterpret_cast<char *>(&qw0), sizeof(qw0));
464 B += sizeof(qw0);
465 Linked.append(reinterpret_cast<char *>(&qw1), sizeof(qw1));
466 B += sizeof(qw1);
467 // Clear distance.
468 dist = 0;
469 } else {
470 for (unsigned Tok = 0; RdMask != 0; RdMask >>= 1, ++Tok) {
471 if (RdMask & 1) {
472 uint64_t swsb = 0x30 | Tok;
473 uint64_t qw0 = (sysrd0 & ~swsb_mask) | (swsb << 8);
474 uint64_t qw1 = (sysrd1 & ~fc_mask) | nop;
475 Linked.append(reinterpret_cast<char *>(&qw0), sizeof(qw0));
476 B += sizeof(qw0);
477 Linked.append(reinterpret_cast<char *>(&qw1), sizeof(qw1));
478 B += sizeof(qw1);
479 }
480 }
481 }
482 if (WrMask == unsigned(-1)) {
483 uint64_t qw0 = (sysrd0 & ~swsb_mask) | (dist << 8);
484 uint64_t qw1 = (sysrd1 & ~fc_mask) | allwr;
485 Linked.append(reinterpret_cast<char *>(&qw0), sizeof(qw0));
486 B += sizeof(qw0);
487 Linked.append(reinterpret_cast<char *>(&qw1), sizeof(qw1));
488 B += sizeof(qw1);
489 // Clear distance.
490 dist = 0;
491 } else {
492 for (unsigned Tok = 0; WrMask != 0; WrMask >>= 1, ++Tok) {
493 if (WrMask & 1) {
494 uint64_t swsb = 0x80 | Tok | (dist << 4);
495 uint64_t qw0 = (sysrd0 & ~swsb_mask) | (swsb << 8);
496 uint64_t qw1 = (sysrd1 & ~fc_mask) | nop;
497 Linked.append(reinterpret_cast<char *>(&qw0), sizeof(qw0));
498 B += sizeof(qw0);
499 Linked.append(reinterpret_cast<char *>(&qw1), sizeof(qw1));
500 B += sizeof(qw1);
501 // Clear distance.
502 dist = 0;
503 }
504 }
505 }
506
507 return B;
508 }
509