xref: /aosp_15_r20/external/intel-media-driver/media_softlet/agnostic/common/vp/cm_fc_ld/PatchInfoLinker.cpp (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2019, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 
23 // PatchInfo linker.
24 //
25 
26 #include <cassert>
27 #include <cstddef>
28 #include <cstdint>
29 #include <cstdio>
30 
31 #include "cm_fc_ld.h"
32 
33 #include "DepGraph.h"
34 #ifdef __WITH_IGA__
35 #include "IGAAutoDep.h"
36 #endif
37 #include "PatchInfoLinker.h"
38 #include "PatchInfoReader.h"
39 
40 #ifndef __WITH_IGA__
41 namespace cm {
42 namespace toolchain {
43 
44 // Dummy hook for policy 2.
45 std::pair<int, std::string>
resolvDep(unsigned P,const char * Bin,std::size_t Sz)46 resolvDep(unsigned P, const char *Bin, std::size_t Sz) {
47   std::string Buf(Bin, Sz);
48   return std::make_pair(0, Buf);
49 }
50 
51 } // End namespace toolchain
52 } // End namespace cm
53 #endif
54 
55 namespace {
56 
57 class PatchInfoLinker {
58   std::size_t NumKernels;
59   cm_fc_kernel_t *Kernels;
60 
61   const char *Options;
62 
63   std::string Linked;
64 
65   unsigned Policy;
66   unsigned r127Token;
67   bool hasR127Token;
68 
69   unsigned Platform;
70 
71 public:
PatchInfoLinker(std::size_t NK,cm_fc_kernel_t * K,const char * O=nullptr)72   PatchInfoLinker(std::size_t NK, cm_fc_kernel_t *K, const char *O = nullptr)
73     : NumKernels(NK), Kernels(K), Options(O),
74       Policy(cm::patch::DepGraph::SWSB_POLICY_1),
75       r127Token(-1),
76       hasR127Token(false),
77       Platform(0) {
78     parseOptions();
79   }
80 
81   bool link(cm::patch::Collection &C);
82 
83 protected:
84   unsigned align(unsigned);
85   unsigned writeNOP(unsigned);
86   unsigned writeEOT();
87 
parseOptions()88   void parseOptions() {
89     std::string Opt;
90     if (Options)
91       Opt = Options;
92 
93     if (Opt.empty())
94       return;
95 
96     std::string::size_type pos = 0;
97     do {
98       pos = Opt.find_first_not_of(':', pos);
99       if (pos == std::string::npos)
100         break;
101       switch (Opt[pos]) {
102       default:
103         break;
104       case 'p':
105         ++pos;
106         if (pos >= Opt.size())
107           break;
108         switch (Opt[pos]) {
109         default:
110           break;
111         case '0':
112           Policy = cm::patch::DepGraph::SWSB_POLICY_0;
113           ++pos;
114           break;
115         case '1':
116           Policy = cm::patch::DepGraph::SWSB_POLICY_1;
117           ++pos;
118           break;
119         case '2':
120           Policy = cm::patch::DepGraph::SWSB_POLICY_2;
121           ++pos;
122           break;
123         }
124       }
125       if (pos >= Opt.size())
126         break;
127       pos = Opt.find_first_of(':', pos);
128       if (pos == std::string::npos)
129         break;
130       ++pos;
131     } while (pos < Opt.size());
132   }
133 
134   unsigned writeSync(unsigned RdMask, unsigned WrMask);
135 };
136 
137 } // End anonymous namespace
138 
linkPatchInfo(cm::patch::Collection & C,std::size_t NumKernels,cm_fc_kernel_t * Kernels,const char * Options)139 bool linkPatchInfo(cm::patch::Collection &C,
140                    std::size_t NumKernels, cm_fc_kernel_t *Kernels,
141                    const char *Options) {
142   PatchInfoLinker LD(NumKernels, Kernels, Options);
143   return LD.link(C);
144 }
145 
link(cm::patch::Collection & C)146 bool PatchInfoLinker::link(cm::patch::Collection &C) {
147   for (unsigned i = 0, e = unsigned(NumKernels); i != e; ++i)
148     if (readPatchInfo(Kernels[i].patch_buf, Kernels[i].patch_size, C))
149       return true;
150 
151   Platform = C.getPlatform();
152 
153   std::map<cm::patch::Binary *, cm::patch::Symbol *> BinMap;
154   // Setup mapping from binary to symbol.
155   for (auto I = C.sym_begin(), E = C.sym_end(); I != E; ++I) {
156     // Bail out if there's unresolved symbol.
157     if (I->isUnresolved())
158       return true;
159     if (I->getAddr() == 0) {
160       BinMap[I->getBinary()] = &*I;
161       I->getBinary()->setName(&*I);
162     }
163   }
164 
165   // Associate separate binaries and find the last top-level kernel.
166   cm::patch::Binary *LastTopBin = nullptr;
167   unsigned n = 0;
168   for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
169     auto &B = *I;
170     if (n >= NumKernels)
171       return true;
172     B.setData(Kernels[n].binary_buf);
173     B.setSize(Kernels[n].binary_size);
174     B.clearSyncPoints();
175     ++n;
176     // Check link type through its symbol.
177     auto M = BinMap.find(&B);
178     if (M == BinMap.end()) // Bail out if there's binary without symbol name.
179       return true;
180     auto S = M->second;
181     B.setLinkType(S->getExtra() & 0x3);
182     if (B.getLinkType() != CM_FC_LINK_TYPE_CALLEE)
183     {
184         LastTopBin = &B;
185         for (auto RI = LastTopBin->finireg_begin(),
186             RE = LastTopBin->finireg_end(); RI != RE; ++RI) {
187             // Skip use-only access without token associated.
188             if (RI->isDefNotByToken())
189                 continue;
190             unsigned Reg = RI->getRegNo();
191             if (Reg == 127)
192             {
193                 std::tie(hasR127Token, r127Token) = RI->getToken();
194             }
195         }
196     }
197   }
198   // Bail out if there is mismatch.
199   if (n != NumKernels)
200     return true;
201 
202   cm::patch::DepGraph DG(C, Policy);
203   DG.build();
204   DG.resolve();
205 
206   // Link all kernels into 'linked' buffer.
207   Linked.clear();
208   for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
209     auto Bin = &*I;
210     align(4); // Align to 16B, i.e. 1 << 4.
211     // Real binary starts from here.
212     Bin->setPos(unsigned(Linked.size()));
213     Bin->sortSyncPoints();
214     unsigned Start = 0;
215     unsigned Inserted = 0;
216     for (auto SI = Bin->sp_begin(), SE = Bin->sp_end(); SI != SE; ++SI) {
217       auto Node = *SI;
218       unsigned Offset = Node->getOffset();
219       assert(Start <= Offset && "Invalid insert point!");
220       if (Start < Offset) {
221         Linked.append(Bin->getData() + Start, Offset - Start);
222         // Adjust relocation in this range.
223         for (auto RI = Bin->rel_begin(), RE = Bin->rel_end(); RI != RE; ++RI) {
224           unsigned RelOff = RI->getOffset();
225           if (Start <= RelOff && RelOff < Offset)
226             RI->setOffset(RelOff + Inserted);
227         }
228       }
229       Start = Offset;
230       if (Platform < cm::patch::PP_PVC)
231       {
232       Inserted += writeSync(Node->getRdTokenMask(), Node->getWrTokenMask());
233       }
234     }
235     Linked.append(Bin->getData() + Start, Bin->getSize() - Start);
236     for (auto RI = Bin->rel_begin(), RE = Bin->rel_end(); RI != RE; ++RI) {
237       unsigned RelOff = RI->getOffset();
238       if (Start <= RelOff && RelOff < Bin->getSize())
239         RI->setOffset(RelOff + Inserted);
240     }
241     if (Bin == LastTopBin)
242       writeEOT();
243   }
244   writeNOP(64);
245 
246   // Fix relocations.
247   for (auto I = C.bin_begin(), E = C.bin_end(); I != E; ++I) {
248     auto Bin = &*I;
249     unsigned Offset = Bin->getPos();
250     // Bail out if this binary is not in the position map.
251     if (Offset == unsigned(-1))
252       return true;
253     for (auto RI = Bin->rel_begin(), RE = Bin->rel_end(); RI != RE; ++RI) {
254       auto S = RI->getSymbol();
255       auto Target = S->getBinary();
256       unsigned TargetOffset = Target->getPos();
257       // Bail out if this binary is not in the position map.
258       if (TargetOffset == unsigned(-1))
259         return true;
260       unsigned AbsIP = Offset + RI->getOffset();
261       unsigned AbsJIP = TargetOffset + S->getAddr();
262       int Imm = AbsJIP - AbsIP;
263       uint32_t *p = reinterpret_cast<uint32_t *>(&Linked[AbsIP]);
264       p[3] = Imm;
265       ++n;
266     }
267   }
268 
269   if (Policy == cm::patch::DepGraph::SWSB_POLICY_2) {
270     std::string Out;
271     int Ret;
272     std::tie(Ret, Out) =
273       cm::toolchain::resolvDep(Platform, Linked.data(), Linked.size());
274     if (Ret < 0)
275       return true;
276     Linked.swap(Out);
277   }
278 
279   C.setLinkedBinary(std::move(Linked));
280 
281   return false;
282 }
283 
align(unsigned Align)284 unsigned PatchInfoLinker::align(unsigned Align) {
285   unsigned A = (1U << Align);
286   unsigned Origin = unsigned(Linked.size());
287   unsigned Padding = ((Origin + A - 1) / A) * A;
288   return writeNOP(Padding - Origin);
289 }
290 
291 /// Append Linked with 'nop' taking up to N bytes.
writeNOP(unsigned N)292 unsigned PatchInfoLinker::writeNOP(unsigned N) {
293   // Bail out if N is not aligned with 8B, i.e. 64 bits. That's the minimal
294   // size of 'nop' instruction.
295   if (N % 8 != 0)
296     return 0;
297   uint64_t regular_nop = 0;
298   uint64_t compact_nop = 0;
299   switch (Platform) {
300   case cm::patch::PP_TGL:
301   case cm::patch::PP_DG2:
302   case cm::patch::PP_PVC:
303   case cm::patch::PP_ELF:
304     regular_nop = 0x00000060U;
305     compact_nop = 0x20000060U;
306     break;
307   default:
308     regular_nop = 0x0000007eU;
309     compact_nop = 0x2000007eU;
310     break;
311   }
312   unsigned B = 0;
313   while (N > 8) {
314     Linked.append(reinterpret_cast<char *>(&regular_nop), sizeof(regular_nop));
315     Linked.append(sizeof(uint64_t), 0);
316     N -= 16;
317     B += 16;
318   }
319   while (N > 0) {
320     Linked.append(reinterpret_cast<char *>(&compact_nop), sizeof(compact_nop));
321     N -= 8;
322     B += 8;
323   }
324   return B;
325 }
326 
writeEOT()327 unsigned PatchInfoLinker::writeEOT() {
328   uint64_t mov0 = 0;
329   uint64_t mov1 = 0;
330   uint64_t snd0 = 0;
331   uint64_t snd1 = 0;
332   uint64_t r127_sync0 = 0;
333   uint64_t r127_sync1 = 0;
334   uint64_t r127_1_sync0 = 0;
335   uint64_t r127_1_sync1 = 0;
336 
337   switch (Platform) {
338   case cm::patch::PP_TGL:
339   {
340       if (hasR127Token)
341       {
342           uint8_t  *sync0Ptr = (uint8_t *)&r127_sync0;
343           r127_sync0 = 0x0001000000002001ULL;
344           r127_sync1 = 0x0000000000000000ULL;
345           sync0Ptr[1] |= (uint8_t)r127Token;
346       }
347       mov0 = 0x7f050aa080030161ULL;
348       mov1 = 0x0000000000460005ULL;
349       snd0 = 0x0000000400000131ULL;
350       snd1 = 0x0000000070207f0cULL;
351   }
352     break;
353     case cm::patch::PP_DG2:
354   {
355       if (hasR127Token)
356       {
357           uint8_t  *sync0Ptr = (uint8_t *)&r127_sync0;
358           r127_sync0 = 0x0001000000002001ULL;
359           r127_sync1 = 0x0000000000000000ULL;
360           sync0Ptr[1] |= (uint8_t)r127Token;
361       }
362       mov0 = 0x7f050aa080030961ULL;
363       mov1 = 0x0000000000100004ULL;
364       snd0 = 0x0000000480000931ULL;
365       snd1 = 0x0000000030207f0cULL;
366   }
367     break;
368   case cm::patch::PP_PVC:
369   {
370       if (hasR127Token)
371       {
372           r127_sync0 = 0x0001000000008001ULL;
373           r127_sync1 = 0x0000000000000000ULL;
374           r127_1_sync0 = 0x000100000000A001ULL;
375           r127_1_sync1 = 0x0000000000000000ULL;
376           uint8_t* sync0Ptr = (uint8_t*)& r127_sync0;
377           sync0Ptr[1] |= (uint8_t)r127Token;
378           sync0Ptr = (uint8_t*)& r127_1_sync0;
379           sync0Ptr[1] |= (uint8_t)r127Token;
380       }
381       mov0 = 0x7f050aa0800c0961ULL;
382       mov1 = 0x0000000000100004ULL;
383       snd0 = 0x00000004800c0931ULL;
384       snd1 = 0x0000000030207f0cULL;
385   }
386     break;
387   case cm::patch::PP_ELF:
388   {
389       if (hasR127Token)
390       {
391           r127_sync0 = 0x0001000000008001ULL;
392           r127_sync1 = 0x0000000000000000ULL;
393           r127_1_sync0 = 0x000100000000A001ULL;
394           r127_1_sync1 = 0x0000000000000000ULL;
395           uint8_t* sync0Ptr = (uint8_t*)&r127_sync0;
396           sync0Ptr[1] |= (uint8_t)r127Token;
397           sync0Ptr = (uint8_t*)&r127_1_sync0;
398           sync0Ptr[1] |= (uint8_t)r127Token;
399       }
400 
401       mov0 = 0x7f050aa0800c0961ULL;
402       mov1 = 0x0000000000100004ULL;
403       snd0 = 0x00000004800c0931ULL;
404       snd1 = 0x0000000030207f0cULL;
405   }
406     break;
407   default:
408     mov0 = 0x2fe0020c00600001ULL;
409     mov1 = 0x00000000008d0000ULL;
410     snd0 = 0x2000020007000031ULL;
411     snd1 = 0x8200001006000fe0ULL;
412     break;
413   }
414   unsigned B = 0;
415   if (hasR127Token)
416   {
417       Linked.append(reinterpret_cast<char *>(&r127_sync0), sizeof(r127_sync0));
418       B += sizeof(r127_sync0);
419       Linked.append(reinterpret_cast<char *>(&r127_sync1), sizeof(r127_sync1));
420       B += sizeof(r127_sync1);
421       if (Platform == cm::patch::PP_PVC)
422       {
423           Linked.append(reinterpret_cast<char*>(&r127_1_sync0), sizeof(r127_1_sync0));
424           B += sizeof(r127_1_sync0);
425           Linked.append(reinterpret_cast<char*>(&r127_1_sync1), sizeof(r127_1_sync1));
426           B += sizeof(r127_1_sync1);
427       }
428   }
429   Linked.append(reinterpret_cast<char *>(&mov0), sizeof(mov0));
430   B += sizeof(mov0);
431   Linked.append(reinterpret_cast<char *>(&mov1), sizeof(mov1));
432   B += sizeof(mov1);
433   Linked.append(reinterpret_cast<char *>(&snd0), sizeof(snd0));
434   B += sizeof(snd0);
435   Linked.append(reinterpret_cast<char *>(&snd1), sizeof(snd1));
436   B += sizeof(snd1);
437 
438   return B;
439 }
440 
441 
writeSync(unsigned RdMask,unsigned WrMask)442 unsigned PatchInfoLinker::writeSync(unsigned RdMask, unsigned WrMask) {
443   uint64_t sysrd0 = 0x0001000000000101;
444   uint64_t sysrd1 = 0x0000000020000000;
445   uint64_t syswr0 = 0x0001000000000001;
446   uint64_t syswr1 = 0x0000000030000000;
447 
448   // Force 1 distance to sync in-order instruction.
449   uint64_t swsb_mask = 0x000000000000FF00;
450 
451   // Subfuncs on qword1.
452   uint64_t fc_mask = 0x00000000F0000000;
453   uint64_t   nop   = 0x0000000000000000;
454   uint64_t allrd   = 0x0000000020000000;
455   uint64_t allwr   = 0x0000000030000000;
456 
457   uint64_t dist = 1;
458   unsigned B = 0;
459 
460   if (RdMask == unsigned(-1)) {
461     uint64_t qw0 = (sysrd0 & ~swsb_mask) | (dist << 8);
462     uint64_t qw1 = (sysrd1 & ~fc_mask) | allrd;
463     Linked.append(reinterpret_cast<char *>(&qw0), sizeof(qw0));
464     B += sizeof(qw0);
465     Linked.append(reinterpret_cast<char *>(&qw1), sizeof(qw1));
466     B += sizeof(qw1);
467     // Clear distance.
468     dist = 0;
469   } else {
470     for (unsigned Tok = 0; RdMask != 0; RdMask >>= 1, ++Tok) {
471       if (RdMask & 1) {
472         uint64_t swsb = 0x30 | Tok;
473         uint64_t qw0 = (sysrd0 & ~swsb_mask) | (swsb << 8);
474         uint64_t qw1 = (sysrd1 & ~fc_mask) | nop;
475         Linked.append(reinterpret_cast<char *>(&qw0), sizeof(qw0));
476         B += sizeof(qw0);
477         Linked.append(reinterpret_cast<char *>(&qw1), sizeof(qw1));
478         B += sizeof(qw1);
479       }
480     }
481   }
482   if (WrMask == unsigned(-1)) {
483     uint64_t qw0 = (sysrd0 & ~swsb_mask) | (dist << 8);
484     uint64_t qw1 = (sysrd1 & ~fc_mask) | allwr;
485     Linked.append(reinterpret_cast<char *>(&qw0), sizeof(qw0));
486     B += sizeof(qw0);
487     Linked.append(reinterpret_cast<char *>(&qw1), sizeof(qw1));
488     B += sizeof(qw1);
489     // Clear distance.
490     dist = 0;
491   } else {
492     for (unsigned Tok = 0; WrMask != 0; WrMask >>= 1, ++Tok) {
493       if (WrMask & 1) {
494         uint64_t swsb = 0x80 | Tok | (dist << 4);
495         uint64_t qw0 = (sysrd0 & ~swsb_mask) | (swsb << 8);
496         uint64_t qw1 = (sysrd1 & ~fc_mask) | nop;
497         Linked.append(reinterpret_cast<char *>(&qw0), sizeof(qw0));
498         B += sizeof(qw0);
499         Linked.append(reinterpret_cast<char *>(&qw1), sizeof(qw1));
500         B += sizeof(qw1);
501         // Clear distance.
502         dist = 0;
503       }
504     }
505   }
506 
507   return B;
508 }
509