xref: /aosp_15_r20/external/bcc/src/cc/frontends/clang/b_frontend_action.cc (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1 /*
2  * Copyright (c) 2015 PLUMgrid, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <linux/bpf.h>
17 #include <linux/version.h>
18 #include <sys/utsname.h>
19 #include <unistd.h>
20 #include <stdlib.h>
21 
22 #include <clang/AST/ASTConsumer.h>
23 #include <clang/AST/ASTContext.h>
24 #include <clang/AST/RecordLayout.h>
25 #include <clang/Frontend/CompilerInstance.h>
26 #include <clang/Frontend/MultiplexConsumer.h>
27 #include <clang/Rewrite/Core/Rewriter.h>
28 #include <clang/Lex/Lexer.h>
29 
30 #include "frontend_action_common.h"
31 #include "b_frontend_action.h"
32 #include "bpf_module.h"
33 #include "common.h"
34 #include "loader.h"
35 #include "table_storage.h"
36 #include "arch_helper.h"
37 #include "bcc_libbpf_inc.h"
38 
39 #include "libbpf.h"
40 #include "bcc_syms.h"
41 
42 namespace ebpf {
43 
44 constexpr int MAX_CALLING_CONV_REGS = 6;
45 const char *calling_conv_regs_x86[] = {
46   "di", "si", "dx", "cx", "r8", "r9"
47 };
48 const char *calling_conv_syscall_regs_x86[] = {
49   "di", "si", "dx", "r10", "r8", "r9"
50 };
51 const char *calling_conv_regs_ppc[] = {"gpr[3]", "gpr[4]", "gpr[5]",
52                                        "gpr[6]", "gpr[7]", "gpr[8]"};
53 
54 const char *calling_conv_regs_s390x[] = { "gprs[2]", "gprs[3]", "gprs[4]",
55 					 "gprs[5]", "gprs[6]" };
56 const char *calling_conv_syscall_regs_s390x[] = { "orig_gpr2", "gprs[3]", "gprs[4]",
57 					 "gprs[5]", "gprs[6]" };
58 
59 const char *calling_conv_regs_arm64[] = {"regs[0]", "regs[1]", "regs[2]",
60                                        "regs[3]", "regs[4]", "regs[5]"};
61 const char *calling_conv_syscall_regs_arm64[] = {"orig_x0", "regs[1]", "regs[2]",
62                                        "regs[3]", "regs[4]", "regs[5]"};
63 
64 const char *calling_conv_regs_mips[] = {"regs[4]", "regs[5]", "regs[6]",
65                                        "regs[7]", "regs[8]", "regs[9]"};
66 
67 const char *calling_conv_regs_riscv64[] = {"a0", "a1", "a2",
68                                        "a3", "a4", "a5"};
69 
70 const char *calling_conv_regs_loongarch[] = {"regs[4]", "regs[5]", "regs[6]",
71 					     "regs[7]", "regs[8]", "regs[9]"};
72 
73 
get_call_conv_cb(bcc_arch_t arch,bool for_syscall)74 void *get_call_conv_cb(bcc_arch_t arch, bool for_syscall)
75 {
76   const char **ret;
77 
78   switch(arch) {
79     case BCC_ARCH_PPC:
80     case BCC_ARCH_PPC_LE:
81       ret = calling_conv_regs_ppc;
82       break;
83     case BCC_ARCH_S390X:
84       ret = calling_conv_regs_s390x;
85       if (for_syscall)
86         ret = calling_conv_syscall_regs_s390x;
87       break;
88     case BCC_ARCH_ARM64:
89       ret = calling_conv_regs_arm64;
90       if (for_syscall)
91         ret = calling_conv_syscall_regs_arm64;
92       break;
93     case BCC_ARCH_MIPS:
94       ret = calling_conv_regs_mips;
95       break;
96     case BCC_ARCH_RISCV64:
97       ret = calling_conv_regs_riscv64;
98       break;
99     case BCC_ARCH_LOONGARCH:
100       ret = calling_conv_regs_loongarch;
101       break;
102     default:
103       if (for_syscall)
104         ret = calling_conv_syscall_regs_x86;
105       else
106         ret = calling_conv_regs_x86;
107   }
108 
109   return (void *)ret;
110 }
111 
get_call_conv(bool for_syscall=false)112 const char **get_call_conv(bool for_syscall = false) {
113   const char **ret;
114 
115   ret = (const char **)run_arch_callback(get_call_conv_cb, for_syscall);
116   return ret;
117 }
118 
pt_regs_syscall_regs(void)119 const char *pt_regs_syscall_regs(void) {
120   const char **calling_conv_regs;
121   // Equivalent of PT_REGS_SYSCALL_REGS(ctx) ((struct pt_regs *)PT_REGS_PARM1(ctx))
122   calling_conv_regs = (const char **)run_arch_callback(get_call_conv_cb, false);
123   return calling_conv_regs[0];
124 }
125 
126 /* Use resolver only once per translation */
127 static void *kresolver = NULL;
get_symbol_resolver(void)128 static void *get_symbol_resolver(void) {
129   if (!kresolver)
130     kresolver = bcc_symcache_new(-1, nullptr);
131   return kresolver;
132 }
133 
check_bpf_probe_read_kernel(void)134 static std::string check_bpf_probe_read_kernel(void) {
135   bool is_probe_read_kernel;
136   void *resolver = get_symbol_resolver();
137   uint64_t addr = 0;
138   is_probe_read_kernel = bcc_symcache_resolve_name(resolver, nullptr,
139                           "bpf_probe_read_kernel", &addr) >= 0 ? true: false;
140 
141   /* If bpf_probe_read is not found (ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) is
142    * not set in newer kernel, then bcc would anyway fail */
143   if (is_probe_read_kernel)
144     return "bpf_probe_read_kernel";
145   else
146     return "bpf_probe_read";
147 }
148 
check_bpf_probe_read_user(llvm::StringRef probe,bool & overlap_addr)149 static std::string check_bpf_probe_read_user(llvm::StringRef probe,
150         bool& overlap_addr) {
151   if (probe.str() == "bpf_probe_read_user" ||
152       probe.str() == "bpf_probe_read_user_str") {
153     // Check for probe_user symbols in backported kernel before fallback
154     void *resolver = get_symbol_resolver();
155     uint64_t addr = 0;
156     bool found = bcc_symcache_resolve_name(resolver, nullptr,
157                   "bpf_probe_read_user", &addr) >= 0 ? true: false;
158     if (found)
159       return probe.str();
160 
161     /* For arch with overlapping address space, dont use bpf_probe_read for
162      * user read. Just error out */
163 #if defined(__s390x__)
164     overlap_addr = true;
165     return "";
166 #endif
167 
168     if (probe.str() == "bpf_probe_read_user")
169       return "bpf_probe_read";
170     else
171       return "bpf_probe_read_str";
172   }
173   return "";
174 }
175 
176 using std::map;
177 using std::move;
178 using std::set;
179 using std::tuple;
180 using std::make_tuple;
181 using std::string;
182 using std::to_string;
183 using std::unique_ptr;
184 using std::vector;
185 using namespace clang;
186 
187 class ProbeChecker : public RecursiveASTVisitor<ProbeChecker> {
188  public:
ProbeChecker(Expr * arg,const set<tuple<Decl *,int>> & ptregs,bool track_helpers,bool is_assign)189   explicit ProbeChecker(Expr *arg, const set<tuple<Decl *, int>> &ptregs,
190                         bool track_helpers, bool is_assign)
191       : needs_probe_(false), is_transitive_(false), ptregs_(ptregs),
192         track_helpers_(track_helpers), nb_derefs_(0), is_assign_(is_assign) {
193     if (arg) {
194       TraverseStmt(arg);
195       if (arg->getType()->isPointerType())
196         is_transitive_ = needs_probe_;
197     }
198   }
ProbeChecker(Expr * arg,const set<tuple<Decl *,int>> & ptregs,bool is_transitive)199   explicit ProbeChecker(Expr *arg, const set<tuple<Decl *, int>> &ptregs,
200                         bool is_transitive)
201       : ProbeChecker(arg, ptregs, is_transitive, false) {}
VisitCallExpr(CallExpr * E)202   bool VisitCallExpr(CallExpr *E) {
203     needs_probe_ = false;
204 
205     if (is_assign_) {
206       // We're looking for a function that returns an external pointer,
207       // regardless of the number of dereferences.
208       for(auto p : ptregs_) {
209         if (std::get<0>(p) == E->getDirectCallee()) {
210           needs_probe_ = true;
211           // ptregs_ stores the number of dereferences needed to get the external
212           // pointer, while nb_derefs_ stores the number of dereferences
213           // encountered.  So, any dereference encountered is one less
214           // dereference needed to get the external pointer.
215           nb_derefs_ -= std::get<1>(p);
216           return false;
217         }
218       }
219     } else {
220       tuple<Decl *, int> pt = make_tuple(E->getDirectCallee(), nb_derefs_);
221       if (ptregs_.find(pt) != ptregs_.end())
222         needs_probe_ = true;
223     }
224 
225     if (!track_helpers_)
226       return false;
227     if (VarDecl *V = dyn_cast_or_null<VarDecl>(E->getCalleeDecl()))
228       needs_probe_ = V->getName() == "bpf_get_current_task";
229     return false;
230   }
VisitMemberExpr(MemberExpr * M)231   bool VisitMemberExpr(MemberExpr *M) {
232     tuple<Decl *, int> pt = make_tuple(M->getMemberDecl(), nb_derefs_);
233     if (ptregs_.find(pt) != ptregs_.end()) {
234       needs_probe_ = true;
235       return false;
236     }
237     if (M->isArrow()) {
238       /* In A->b, if A is an external pointer, then A->b should be considered
239        * one too.  However, if we're taking the address of A->b
240        * (nb_derefs_ < 0), we should take it into account for the number of
241        * indirections; &A->b is a pointer to A with an offset. */
242       if (nb_derefs_ >= 0) {
243         ProbeChecker checker = ProbeChecker(M->getBase(), ptregs_,
244                                             track_helpers_, is_assign_);
245         if (checker.needs_probe() && checker.get_nb_derefs() == 0) {
246           needs_probe_ = true;
247           return false;
248         }
249       }
250       nb_derefs_++;
251     }
252     return true;
253   }
VisitUnaryOperator(UnaryOperator * E)254   bool VisitUnaryOperator(UnaryOperator *E) {
255     if (E->getOpcode() == UO_Deref) {
256       /* In *A, if A is an external pointer, then *A should be considered one
257        * too. */
258       ProbeChecker checker = ProbeChecker(E->getSubExpr(), ptregs_,
259                                           track_helpers_, is_assign_);
260       if (checker.needs_probe() && checker.get_nb_derefs() == 0) {
261         needs_probe_ = true;
262         return false;
263       }
264       nb_derefs_++;
265     } else if (E->getOpcode() == UO_AddrOf) {
266       nb_derefs_--;
267     }
268     return true;
269   }
VisitDeclRefExpr(DeclRefExpr * E)270   bool VisitDeclRefExpr(DeclRefExpr *E) {
271     if (is_assign_) {
272       // We're looking for an external pointer, regardless of the number of
273       // dereferences.
274       for(auto p : ptregs_) {
275         if (std::get<0>(p) == E->getDecl()) {
276           needs_probe_ = true;
277           // ptregs_ stores the number of dereferences needed to get the external
278           // pointer, while nb_derefs_ stores the number of dereferences
279           // encountered.  So, any dereference encountered is one less
280           // dereference needed to get the external pointer.
281           nb_derefs_ -= std::get<1>(p);
282           return false;
283         }
284       }
285     } else {
286       tuple<Decl *, int> pt = make_tuple(E->getDecl(), nb_derefs_);
287       if (ptregs_.find(pt) != ptregs_.end())
288         needs_probe_ = true;
289     }
290     return true;
291   }
needs_probe() const292   bool needs_probe() const { return needs_probe_; }
is_transitive() const293   bool is_transitive() const { return is_transitive_; }
get_nb_derefs() const294   int get_nb_derefs() const { return nb_derefs_; }
295  private:
296   bool needs_probe_;
297   bool is_transitive_;
298   const set<tuple<Decl *, int>> &ptregs_;
299   bool track_helpers_;
300   // Nb of dereferences we go through before finding the external pointer.
301   // A negative number counts the number of addrof.
302   int nb_derefs_;
303   bool is_assign_;
304 };
305 
306 // Visit a piece of the AST and mark it as needing probe reads
307 class ProbeSetter : public RecursiveASTVisitor<ProbeSetter> {
308  public:
ProbeSetter(set<tuple<Decl *,int>> * ptregs,int nb_derefs)309   explicit ProbeSetter(set<tuple<Decl *, int>> *ptregs, int nb_derefs)
310       : ptregs_(ptregs), nb_derefs_(nb_derefs) {}
VisitDeclRefExpr(DeclRefExpr * E)311   bool VisitDeclRefExpr(DeclRefExpr *E) {
312     tuple<Decl *, int> pt = make_tuple(E->getDecl(), nb_derefs_);
313     ptregs_->insert(pt);
314     return true;
315   }
ProbeSetter(set<tuple<Decl *,int>> * ptregs)316   explicit ProbeSetter(set<tuple<Decl *, int>> *ptregs)
317       : ProbeSetter(ptregs, 0) {}
VisitUnaryOperator(UnaryOperator * E)318   bool VisitUnaryOperator(UnaryOperator *E) {
319     if (E->getOpcode() == UO_Deref)
320       nb_derefs_++;
321     return true;
322   }
VisitMemberExpr(MemberExpr * M)323   bool VisitMemberExpr(MemberExpr *M) {
324     tuple<Decl *, int> pt = make_tuple(M->getMemberDecl(), nb_derefs_);
325     ptregs_->insert(pt);
326     return false;
327   }
328  private:
329   set<tuple<Decl *, int>> *ptregs_;
330   // Nb of dereferences we go through before getting to the actual variable.
331   int nb_derefs_;
332 };
333 
MapVisitor(set<Decl * > & m)334 MapVisitor::MapVisitor(set<Decl *> &m) : m_(m) {}
335 
VisitCallExpr(CallExpr * Call)336 bool MapVisitor::VisitCallExpr(CallExpr *Call) {
337   if (MemberExpr *Memb = dyn_cast<MemberExpr>(Call->getCallee()->IgnoreImplicit())) {
338     StringRef memb_name = Memb->getMemberDecl()->getName();
339     if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Memb->getBase())) {
340       if (SectionAttr *A = Ref->getDecl()->getAttr<SectionAttr>()) {
341         if (!A->getName().startswith("maps"))
342           return true;
343 
344         if (memb_name == "update" || memb_name == "insert") {
345           ProbeChecker checker = ProbeChecker(Call->getArg(1), ptregs_, true,
346                                               true);
347           if (checker.needs_probe())
348             m_.insert(Ref->getDecl());
349         }
350       }
351     }
352   }
353   return true;
354 }
355 
ProbeVisitor(ASTContext & C,Rewriter & rewriter,set<Decl * > & m,bool track_helpers)356 ProbeVisitor::ProbeVisitor(ASTContext &C, Rewriter &rewriter,
357                            set<Decl *> &m, bool track_helpers) :
358   C(C), rewriter_(rewriter), m_(m), ctx_(nullptr), track_helpers_(track_helpers),
359   addrof_stmt_(nullptr), is_addrof_(false) {
360   const char **calling_conv_regs = get_call_conv();
361   cannot_fall_back_safely = (calling_conv_regs == calling_conv_regs_s390x || calling_conv_regs == calling_conv_regs_riscv64);
362 }
363 
assignsExtPtr(Expr * E,int * nbDerefs)364 bool ProbeVisitor::assignsExtPtr(Expr *E, int *nbDerefs) {
365   if (IsContextMemberExpr(E)) {
366     *nbDerefs = 0;
367     return true;
368   }
369 
370   /* If the expression contains a call to another function, we need to visit
371   * that function first to know if a rewrite is necessary (i.e., if the
372   * function returns an external pointer). */
373   if (!TraverseStmt(E))
374     return false;
375 
376   ProbeChecker checker = ProbeChecker(E, ptregs_, track_helpers_,
377                                       true);
378   if (checker.is_transitive()) {
379     // The negative of the number of dereferences is the number of addrof.  In
380     // an assignment, if we went through n addrof before getting the external
381     // pointer, then we'll need n dereferences on the left-hand side variable
382     // to get to the external pointer.
383     *nbDerefs = -checker.get_nb_derefs();
384     return true;
385   }
386 
387   if (E->IgnoreParenCasts()->getStmtClass() == Stmt::CallExprClass) {
388     CallExpr *Call = dyn_cast<CallExpr>(E->IgnoreParenCasts());
389     if (MemberExpr *Memb = dyn_cast<MemberExpr>(Call->getCallee()->IgnoreImplicit())) {
390       StringRef memb_name = Memb->getMemberDecl()->getName();
391       if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Memb->getBase())) {
392         if (SectionAttr *A = Ref->getDecl()->getAttr<SectionAttr>()) {
393           if (!A->getName().startswith("maps"))
394             return false;
395 
396           if (memb_name == "lookup" || memb_name == "lookup_or_init" ||
397               memb_name == "lookup_or_try_init") {
398             if (m_.find(Ref->getDecl()) != m_.end()) {
399               // Retrieved an ext. pointer from a map, mark LHS as ext. pointer.
400               // Pointers from maps always need a single dereference to get the
401               // actual value.  The value may be an external pointer but cannot
402               // be a pointer to an external pointer as the verifier prohibits
403               // storing known pointers (to map values, context, the stack, or
404               // the packet) in maps.
405               *nbDerefs = 1;
406               return true;
407             }
408           }
409         }
410       }
411     }
412   }
413   return false;
414 }
VisitVarDecl(VarDecl * D)415 bool ProbeVisitor::VisitVarDecl(VarDecl *D) {
416   if (Expr *E = D->getInit()) {
417     int nbDerefs;
418     if (assignsExtPtr(E, &nbDerefs)) {
419       // The negative of the number of addrof is the number of dereferences.
420       tuple<Decl *, int> pt = make_tuple(D, nbDerefs);
421       set_ptreg(pt);
422     }
423   }
424   return true;
425 }
426 
TraverseStmt(Stmt * S)427 bool ProbeVisitor::TraverseStmt(Stmt *S) {
428   if (whitelist_.find(S) != whitelist_.end())
429     return true;
430   auto ret = RecursiveASTVisitor<ProbeVisitor>::TraverseStmt(S);
431   if (addrof_stmt_ == S) {
432     addrof_stmt_ = nullptr;
433     is_addrof_ = false;
434   }
435   return ret;
436 }
437 
VisitCallExpr(CallExpr * Call)438 bool ProbeVisitor::VisitCallExpr(CallExpr *Call) {
439   Decl *decl = Call->getCalleeDecl();
440   if (decl == nullptr)
441       return true;
442 
443   // Skip bpf_probe_read for the third argument if it is an AddrOf.
444   if (VarDecl *V = dyn_cast<VarDecl>(decl)) {
445     if (V->getName() == "bpf_probe_read" && Call->getNumArgs() >= 3) {
446       const Expr *E = Call->getArg(2)->IgnoreParenCasts();
447       whitelist_.insert(E);
448       return true;
449     }
450   }
451 
452   if (FunctionDecl *F = dyn_cast<FunctionDecl>(decl)) {
453     if (F->hasBody()) {
454       unsigned i = 0;
455       for (auto arg : Call->arguments()) {
456         ProbeChecker checker = ProbeChecker(arg, ptregs_, track_helpers_,
457                                             true);
458         if (checker.needs_probe()) {
459           tuple<Decl *, int> pt = make_tuple(F->getParamDecl(i),
460                                              -checker.get_nb_derefs());
461           ptregs_.insert(pt);
462         }
463         ++i;
464       }
465       if (fn_visited_.find(F) == fn_visited_.end()) {
466         fn_visited_.insert(F);
467         /* Maintains a stack of the number of dereferences for the external
468          * pointers returned by each function in the call stack or -1 if the
469          * function didn't return an external pointer. */
470         ptregs_returned_.push_back(-1);
471         TraverseDecl(F);
472         int nb_derefs = ptregs_returned_.back();
473         ptregs_returned_.pop_back();
474         if (nb_derefs != -1) {
475           tuple<Decl *, int> pt = make_tuple(F, nb_derefs);
476           ptregs_.insert(pt);
477         }
478       }
479     }
480   }
481   return true;
482 }
VisitReturnStmt(ReturnStmt * R)483 bool ProbeVisitor::VisitReturnStmt(ReturnStmt *R) {
484   /* If this function wasn't called by another, there's no need to check the
485    * return statement for external pointers. */
486   if (ptregs_returned_.size() == 0)
487     return true;
488 
489   /* Reverse order of traversals.  This is needed if, in the return statement,
490    * we're calling a function that's returning an external pointer: we need to
491    * know what the function is returning to decide what this function is
492    * returning. */
493   if (!TraverseStmt(R->getRetValue()))
494     return false;
495 
496   ProbeChecker checker = ProbeChecker(R->getRetValue(), ptregs_,
497                                       track_helpers_, true);
498   if (checker.needs_probe()) {
499     int curr_nb_derefs = ptregs_returned_.back();
500     int nb_derefs = -checker.get_nb_derefs();
501     /* If the function returns external pointers with different levels of
502      * indirection, we handle the case with the highest level of indirection
503      * and leave it to the user to manually handle other cases. */
504     if (nb_derefs > curr_nb_derefs) {
505       ptregs_returned_.pop_back();
506       ptregs_returned_.push_back(nb_derefs);
507     }
508   }
509   return true;
510 }
VisitBinaryOperator(BinaryOperator * E)511 bool ProbeVisitor::VisitBinaryOperator(BinaryOperator *E) {
512   if (!E->isAssignmentOp())
513     return true;
514 
515   // copy probe attribute from RHS to LHS if present
516   int nbDerefs;
517   if (assignsExtPtr(E->getRHS(), &nbDerefs)) {
518     ProbeSetter setter(&ptregs_, nbDerefs);
519     setter.TraverseStmt(E->getLHS());
520   }
521   return true;
522 }
VisitUnaryOperator(UnaryOperator * E)523 bool ProbeVisitor::VisitUnaryOperator(UnaryOperator *E) {
524   if (E->getOpcode() == UO_AddrOf) {
525     addrof_stmt_ = E;
526     is_addrof_ = true;
527   }
528   if (E->getOpcode() != UO_Deref)
529     return true;
530   if (memb_visited_.find(E) != memb_visited_.end())
531     return true;
532   Expr *sub = E->getSubExpr();
533   if (!ProbeChecker(sub, ptregs_, track_helpers_).needs_probe())
534     return true;
535   memb_visited_.insert(E);
536   string pre, post;
537   pre = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
538   if (cannot_fall_back_safely)
539     pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (void *)";
540   else
541     pre += " bpf_probe_read(&_val, sizeof(_val), (void *)";
542   post = "); _val; })";
543   rewriter_.ReplaceText(expansionLoc(E->getOperatorLoc()), 1, pre);
544   rewriter_.InsertTextAfterToken(expansionLoc(GET_ENDLOC(sub)), post);
545   return true;
546 }
VisitMemberExpr(MemberExpr * E)547 bool ProbeVisitor::VisitMemberExpr(MemberExpr *E) {
548   if (memb_visited_.find(E) != memb_visited_.end()) return true;
549 
550   Expr *base;
551   SourceLocation rhs_start, member;
552   bool found = false;
553   for (MemberExpr *M = E; M; M = dyn_cast<MemberExpr>(M->getBase())) {
554     memb_visited_.insert(M);
555     rhs_start = GET_ENDLOC(M);
556     base = M->getBase();
557     member = M->getMemberLoc();
558     if (M->isArrow()) {
559       found = true;
560       break;
561     }
562   }
563   if (!found)
564     return true;
565   if (member.isInvalid()) {
566     error(GET_ENDLOC(base), "internal error: MemberLoc is invalid while preparing probe rewrite");
567     return false;
568   }
569 
570   if (!rewriter_.isRewritable(GET_BEGINLOC(E)))
571     return true;
572 
573   // parent expr has addrof, skip the rewrite, set is_addrof_ to flase so
574   // it won't affect next level of indirect address
575   if (is_addrof_) {
576     is_addrof_ = false;
577     return true;
578   }
579 
580   /* If the base of the dereference is a call to another function, we need to
581    * visit that function first to know if a rewrite is necessary (i.e., if the
582    * function returns an external pointer). */
583   if (base->IgnoreParenCasts()->getStmtClass() == Stmt::CallExprClass) {
584     CallExpr *Call = dyn_cast<CallExpr>(base->IgnoreParenCasts());
585     if (!TraverseStmt(Call))
586       return false;
587   }
588 
589   // Checks to see if the expression references something that needs to be run
590   // through bpf_probe_read.
591   if (!ProbeChecker(base, ptregs_, track_helpers_).needs_probe())
592     return true;
593 
594   // If the base is an array, we will skip rewriting. See issue #2352.
595   if (E->getType()->isArrayType())
596     return true;
597 
598   string rhs = rewriter_.getRewrittenText(expansionRange(SourceRange(rhs_start, GET_ENDLOC(E))));
599   string base_type = base->getType()->getPointeeType().getAsString();
600   string pre, post;
601   pre = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
602   if (cannot_fall_back_safely)
603     pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (void *)&";
604   else
605     pre += " bpf_probe_read(&_val, sizeof(_val), (void *)&";
606   post = rhs + "); _val; })";
607   rewriter_.InsertText(expansionLoc(GET_BEGINLOC(E)), pre);
608   rewriter_.ReplaceText(expansionRange(SourceRange(member, GET_ENDLOC(E))), post);
609   return true;
610 }
VisitArraySubscriptExpr(ArraySubscriptExpr * E)611 bool ProbeVisitor::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
612   if (memb_visited_.find(E) != memb_visited_.end()) return true;
613   if (!ProbeChecker(E, ptregs_, track_helpers_).needs_probe())
614     return true;
615 
616   // Parent expr has addrof, skip the rewrite.
617   if (is_addrof_)
618     return true;
619 
620   // If the base is an array, we will skip rewriting. See issue #2352.
621   if (E->getType()->isArrayType())
622     return true;
623 
624   if (!rewriter_.isRewritable(GET_BEGINLOC(E)))
625     return true;
626 
627   Expr *base = E->getBase();
628   Expr *idx = E->getIdx();
629   memb_visited_.insert(E);
630 
631   if (!rewriter_.isRewritable(GET_BEGINLOC(base)))
632     return true;
633   if (!rewriter_.isRewritable(GET_BEGINLOC(idx)))
634     return true;
635 
636 
637   string pre, lbracket, rbracket;
638   LangOptions opts;
639   SourceLocation lbracket_start, lbracket_end;
640   SourceRange lbracket_range;
641 
642   /* For cases like daddr->s6_addr[4], clang encodes the end location of "base"
643    * as "]". This makes it hard to rewrite the expression like
644    * "daddr->s6_addr  [ 4 ]" since we do not know the end location
645    * of "addr->s6_addr". Let us abort the operation if this is the case.
646    */
647   lbracket_start = Lexer::getLocForEndOfToken(GET_ENDLOC(base), 1,
648                                               rewriter_.getSourceMgr(),
649                                               opts).getLocWithOffset(1);
650   lbracket_end = GET_BEGINLOC(idx).getLocWithOffset(-1);
651   lbracket_range = expansionRange(SourceRange(lbracket_start, lbracket_end));
652   if (rewriter_.getRewrittenText(lbracket_range).size() == 0)
653     return true;
654 
655   pre = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
656   if (cannot_fall_back_safely)
657     pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (void *)((";
658   else
659     pre += " bpf_probe_read(&_val, sizeof(_val), (void *)((";
660   if (isMemberDereference(base)) {
661     pre += "&";
662     // If the base of the array subscript is a member dereference, we'll rewrite
663     // both at the same time.
664     addrof_stmt_ = base;
665     is_addrof_ = true;
666   }
667   rewriter_.InsertText(expansionLoc(GET_BEGINLOC(base)), pre);
668 
669   /* Replace left bracket and any space around it.  Since Clang doesn't provide
670    * a method to retrieve the left bracket, replace everything from the end of
671    * the base to the start of the index. */
672   lbracket = ") + (";
673   rewriter_.ReplaceText(lbracket_range, lbracket);
674 
675   rbracket = "))); _val; })";
676   rewriter_.ReplaceText(expansionLoc(E->getRBracketLoc()), 1, rbracket);
677 
678   return true;
679 }
680 
isMemberDereference(Expr * E)681 bool ProbeVisitor::isMemberDereference(Expr *E) {
682   if (E->IgnoreParenCasts()->getStmtClass() != Stmt::MemberExprClass)
683     return false;
684   for (MemberExpr *M = dyn_cast<MemberExpr>(E->IgnoreParenCasts()); M;
685        M = dyn_cast<MemberExpr>(M->getBase()->IgnoreParenCasts())) {
686     if (M->isArrow())
687       return true;
688   }
689   return false;
690 }
IsContextMemberExpr(Expr * E)691 bool ProbeVisitor::IsContextMemberExpr(Expr *E) {
692   if (!E->getType()->isPointerType())
693     return false;
694 
695   Expr *base;
696   SourceLocation member;
697   bool found = false;
698   MemberExpr *M;
699   Expr *Ex = E->IgnoreParenCasts();
700   while (Ex->getStmtClass() == Stmt::ArraySubscriptExprClass
701          || Ex->getStmtClass() == Stmt::MemberExprClass) {
702     if (Ex->getStmtClass() == Stmt::ArraySubscriptExprClass) {
703       Ex = dyn_cast<ArraySubscriptExpr>(Ex)->getBase()->IgnoreParenCasts();
704     } else if (Ex->getStmtClass() == Stmt::MemberExprClass) {
705       M = dyn_cast<MemberExpr>(Ex);
706       base = M->getBase()->IgnoreParenCasts();
707       member = M->getMemberLoc();
708       if (M->isArrow()) {
709         found = true;
710         break;
711       }
712       Ex = base;
713     }
714   }
715   if (!found) {
716     return false;
717   }
718   if (member.isInvalid()) {
719     return false;
720   }
721 
722   if (DeclRefExpr *base_expr = dyn_cast<DeclRefExpr>(base)) {
723     if (base_expr->getDecl() == ctx_) {
724       return true;
725     }
726   }
727   return false;
728 }
729 
730 SourceRange
expansionRange(SourceRange range)731 ProbeVisitor::expansionRange(SourceRange range) {
732 #if LLVM_VERSION_MAJOR >= 7
733   return rewriter_.getSourceMgr().getExpansionRange(range).getAsRange();
734 #else
735   return rewriter_.getSourceMgr().getExpansionRange(range);
736 #endif
737 }
738 
739 SourceLocation
expansionLoc(SourceLocation loc)740 ProbeVisitor::expansionLoc(SourceLocation loc) {
741   return rewriter_.getSourceMgr().getExpansionLoc(loc);
742 }
743 
744 template <unsigned N>
error(SourceLocation loc,const char (& fmt)[N])745 DiagnosticBuilder ProbeVisitor::error(SourceLocation loc, const char (&fmt)[N]) {
746   unsigned int diag_id = C.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, fmt);
747   return C.getDiagnostics().Report(loc, diag_id);
748 }
749 
BTypeVisitor(ASTContext & C,BFrontendAction & fe)750 BTypeVisitor::BTypeVisitor(ASTContext &C, BFrontendAction &fe)
751     : C(C), diag_(C.getDiagnostics()), fe_(fe), rewriter_(fe.rewriter()), out_(llvm::errs()) {
752   const char **calling_conv_regs = get_call_conv();
753   cannot_fall_back_safely = (calling_conv_regs == calling_conv_regs_s390x || calling_conv_regs == calling_conv_regs_riscv64);
754 }
755 
genParamDirectAssign(FunctionDecl * D,string & preamble,const char ** calling_conv_regs)756 void BTypeVisitor::genParamDirectAssign(FunctionDecl *D, string& preamble,
757                                         const char **calling_conv_regs) {
758   for (size_t idx = 1; idx < fn_args_.size(); idx++) {
759     ParmVarDecl *arg = fn_args_[idx];
760 
761     if (arg->isUsed()) {
762       // Move the args into a preamble section where the same params are
763       // declared and initialized from pt_regs.
764       // This init is only performed when requested by the program.
765       string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange()));
766       arg->addAttr(UnavailableAttr::CreateImplicit(C, "ptregs"));
767       size_t d = idx - 1;
768       const char *reg = calling_conv_regs[d];
769       preamble += " " + text + " = (" + arg->getType().getAsString() + ")" +
770                   fn_args_[0]->getName().str() + "->" + string(reg) + ";";
771     }
772   }
773 }
774 
genParamIndirectAssign(FunctionDecl * D,string & preamble,const char ** calling_conv_regs)775 void BTypeVisitor::genParamIndirectAssign(FunctionDecl *D, string& preamble,
776                                           const char **calling_conv_regs) {
777   string tmp_preamble;
778   bool hasUsed = false;
779   ParmVarDecl *arg = fn_args_[0];
780   string new_ctx = "__" + arg->getName().str();
781 
782   for (size_t idx = 1; idx < fn_args_.size(); idx++) {
783     arg = fn_args_[idx];
784 
785     if (arg->isUsed()) {
786       // Move the args into a preamble section where the same params are
787       // declared and initialized from pt_regs.
788       // This init is only performed when requested by the program.
789       hasUsed = true;
790       string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange()));
791       size_t d = idx - 1;
792       const char *reg = calling_conv_regs[d];
793       tmp_preamble += "\n " + text + ";";
794       if (cannot_fall_back_safely)
795         tmp_preamble += " bpf_probe_read_kernel";
796       else
797         tmp_preamble += " bpf_probe_read";
798       tmp_preamble += "(&" + arg->getName().str() + ", sizeof(" +
799                   arg->getName().str() + "), &" + new_ctx + "->" +
800                   string(reg) + ");";
801     }
802   }
803 
804   arg = fn_args_[0];
805   if ( hasUsed || arg->isUsed()) {
806     preamble += " struct pt_regs * " + new_ctx + " = (void *)" +
807                 arg->getName().str() + "->" +
808                 string(pt_regs_syscall_regs()) + ";";
809   }
810 
811   preamble += tmp_preamble;
812 }
813 
rewriteFuncParam(FunctionDecl * D)814 void BTypeVisitor::rewriteFuncParam(FunctionDecl *D) {
815   string preamble = "{\n";
816   if (D->param_size() > 1) {
817     bool is_syscall = false;
818     if (strncmp(D->getName().str().c_str(), "syscall__", 9) == 0 ||
819         strncmp(D->getName().str().c_str(), "kprobe____x64_sys_", 18) == 0)
820       is_syscall = true;
821     const char **calling_conv_regs = get_call_conv(is_syscall);
822 
823     // If function prefix is "syscall__" or "kprobe____x64_sys_",
824     // the function will attach to a kprobe syscall function.
825     // Guard parameter assiggnment with CONFIG_ARCH_HAS_SYSCALL_WRAPPER.
826     // For __x64_sys_* syscalls, this is always true, but we guard
827     // it in case of "syscall__" for other architectures.
828     if (is_syscall) {
829       preamble += "#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER)\n";
830       genParamIndirectAssign(D, preamble, calling_conv_regs);
831       preamble += "\n#else\n";
832       genParamDirectAssign(D, preamble, calling_conv_regs);
833       preamble += "\n#endif\n";
834     } else {
835       genParamDirectAssign(D, preamble, calling_conv_regs);
836     }
837     rewriter_.ReplaceText(
838         expansionRange(SourceRange(GET_ENDLOC(D->getParamDecl(0)),
839                     GET_ENDLOC(D->getParamDecl(D->getNumParams() - 1)))),
840         fn_args_[0]->getName());
841   }
842   // for each trace argument, convert the variable from ptregs to something on stack
843   if (CompoundStmt *S = dyn_cast<CompoundStmt>(D->getBody()))
844     rewriter_.ReplaceText(S->getLBracLoc(), 1, preamble);
845 }
846 
VisitFunctionDecl(FunctionDecl * D)847 bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
848   // put each non-static non-inline function decl in its own section, to be
849   // extracted by the MemoryManager
850   auto real_start_loc = rewriter_.getSourceMgr().getFileLoc(GET_BEGINLOC(D));
851   if (fe_.is_rewritable_ext_func(D)) {
852     current_fn_ = string(D->getName());
853     string bd = rewriter_.getRewrittenText(expansionRange(D->getSourceRange()));
854     auto func_info = fe_.prog_func_info_.add_func(current_fn_);
855     if (!func_info) {
856       // We should only reach add_func above once per function seen, but the
857       // BPF_PROG-helper using macros in export/helpers.h (KFUNC_PROBE ..
858       // LSM_PROBE) break this logic. TODO: adjust export/helpers.h to not
859       // do so and bail out here, or find a better place to do add_func
860       func_info = fe_.prog_func_info_.get_func(current_fn_);
861       //error(GET_BEGINLOC(D), "redefinition of existing function");
862       //return false;
863     }
864     func_info->src_ = bd;
865     fe_.func_range_[current_fn_] = expansionRange(D->getSourceRange());
866     if (!D->getAttr<SectionAttr>()) {
867       string attr = string("__attribute__((section(\"") + BPF_FN_PREFIX +
868                     D->getName().str() + "\")))\n";
869       rewriter_.InsertText(real_start_loc, attr);
870     }
871     if (D->param_size() > MAX_CALLING_CONV_REGS + 1) {
872       error(GET_BEGINLOC(D->getParamDecl(MAX_CALLING_CONV_REGS + 1)),
873             "too many arguments, bcc only supports in-register parameters");
874       return false;
875     }
876 
877     fn_args_.clear();
878     for (auto arg_it = D->param_begin(); arg_it != D->param_end(); arg_it++) {
879       auto *arg = *arg_it;
880       if (arg->getName() == "") {
881         error(GET_ENDLOC(arg), "arguments to BPF program definition must be named");
882         return false;
883       }
884       fn_args_.push_back(arg);
885     }
886     rewriteFuncParam(D);
887   } else if (D->hasBody() &&
888              rewriter_.getSourceMgr().getFileID(real_start_loc)
889                == rewriter_.getSourceMgr().getMainFileID()) {
890     // rewritable functions that are static should be always treated as helper
891     rewriter_.InsertText(real_start_loc, "__attribute__((always_inline))\n");
892   }
893   return true;
894 }
895 
896 // Reverse the order of call traversal so that parameters inside of
897 // function calls will get rewritten before the call itself, otherwise
898 // text mangling will result.
TraverseCallExpr(CallExpr * Call)899 bool BTypeVisitor::TraverseCallExpr(CallExpr *Call) {
900   for (auto child : Call->children())
901     if (!TraverseStmt(child))
902       return false;
903   if (!WalkUpFromCallExpr(Call))
904     return false;
905   return true;
906 }
907 
908 // convert calls of the type:
909 //  table.foo(&key)
910 // to:
911 //  bpf_table_foo_elem(bpf_pseudo_fd(table), &key [,&leaf])
VisitCallExpr(CallExpr * Call)912 bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
913   // make sure node is a reference to a bpf table, which is assured by the
914   // presence of the section("maps/<typename>") GNU __attribute__
915   if (MemberExpr *Memb = dyn_cast<MemberExpr>(Call->getCallee()->IgnoreImplicit())) {
916     StringRef memb_name = Memb->getMemberDecl()->getName();
917     if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Memb->getBase())) {
918       if (SectionAttr *A = Ref->getDecl()->getAttr<SectionAttr>()) {
919         if (!A->getName().startswith("maps"))
920           return true;
921 
922         string args = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(0)),
923                                                    GET_ENDLOC(Call->getArg(Call->getNumArgs() - 1)))));
924 
925         // find the table fd, which was opened at declaration time
926         TableStorage::iterator desc;
927         Path local_path({fe_.id(), string(Ref->getDecl()->getName())});
928         Path global_path({string(Ref->getDecl()->getName())});
929         if (!fe_.table_storage().Find(local_path, desc)) {
930           if (!fe_.table_storage().Find(global_path, desc)) {
931             error(GET_ENDLOC(Ref), "bpf_table %0 failed to open") << Ref->getDecl()->getName();
932             return false;
933           }
934         }
935         string fd = to_string(desc->second.fd >= 0 ? desc->second.fd : desc->second.fake_fd);
936         string prefix, suffix;
937         string txt;
938         auto rewrite_start = GET_BEGINLOC(Call);
939         auto rewrite_end = GET_ENDLOC(Call);
940         if (memb_name == "lookup_or_init" || memb_name == "lookup_or_try_init") {
941           string name = string(Ref->getDecl()->getName());
942           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
943           string arg1 = rewriter_.getRewrittenText(expansionRange(Call->getArg(1)->getSourceRange()));
944           string lookup = "bpf_map_lookup_elem_(bpf_pseudo_fd(1, " + fd + ")";
945           string update = "bpf_map_update_elem_(bpf_pseudo_fd(1, " + fd + ")";
946           txt  = "({typeof(" + name + ".leaf) *leaf = " + lookup + ", " + arg0 + "); ";
947           txt += "if (!leaf) {";
948           txt += " " + update + ", " + arg0 + ", " + arg1 + ", BPF_NOEXIST);";
949           txt += " leaf = " + lookup + ", " + arg0 + ");";
950           if (memb_name == "lookup_or_init") {
951             txt += " if (!leaf) return 0;";
952           }
953           txt += "}";
954           txt += "leaf;})";
955         } else if (memb_name == "increment" || memb_name == "atomic_increment") {
956           string name = string(Ref->getDecl()->getName());
957           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
958 
959           string increment_value = "1";
960           if (Call->getNumArgs() == 2) {
961             increment_value = rewriter_.getRewrittenText(expansionRange(Call->getArg(1)->getSourceRange()));
962 
963           }
964 
965           string lookup = "bpf_map_lookup_elem_(bpf_pseudo_fd(1, " + fd + ")";
966           string update = "bpf_map_update_elem_(bpf_pseudo_fd(1, " + fd + ")";
967           txt  = "({ typeof(" + name + ".key) _key = " + arg0 + "; ";
968           txt += "typeof(" + name + ".leaf) *_leaf = " + lookup + ", &_key); ";
969           txt += "if (_leaf) ";
970 
971           if (memb_name == "atomic_increment") {
972             txt += "lock_xadd(_leaf, " + increment_value + ");";
973           } else {
974             txt += "(*_leaf) += " + increment_value + ";";
975           }
976           if (desc->second.type == BPF_MAP_TYPE_HASH) {
977             txt += "else { typeof(" + name + ".leaf) _zleaf; __builtin_memset(&_zleaf, 0, sizeof(_zleaf)); ";
978             txt += "_zleaf += " + increment_value + ";";
979             txt += update + ", &_key, &_zleaf, BPF_NOEXIST); } ";
980           }
981           txt += "})";
982         } else if (memb_name == "perf_submit") {
983           string name = string(Ref->getDecl()->getName());
984           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
985           string args_other = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(1)),
986                                                            GET_ENDLOC(Call->getArg(2)))));
987           txt = "bpf_perf_event_output(" + arg0 + ", (void *)bpf_pseudo_fd(1, " + fd + ")";
988           txt += ", CUR_CPU_IDENTIFIER, " + args_other + ")";
989 
990           // e.g.
991           // struct data_t { u32 pid; }; data_t data;
992           // events.perf_submit(ctx, &data, sizeof(data));
993           // ...
994           //                       &data   ->     data    ->  typeof(data)        ->   data_t
995           auto type_arg1 = Call->getArg(1)->IgnoreCasts()->getType().getTypePtr()->getPointeeType().getTypePtrOrNull();
996           if (type_arg1 && type_arg1->isStructureType()) {
997             auto event_type = type_arg1->getAsTagDecl();
998             const auto *r = dyn_cast<RecordDecl>(event_type);
999             std::vector<std::string> perf_event;
1000 
1001             for (auto it = r->field_begin(); it != r->field_end(); ++it) {
1002               // After LLVM commit aee49255074f
1003               // (https://github.com/llvm/llvm-project/commit/aee49255074fd4ef38d97e6e70cbfbf2f9fd0fa7)
1004               // array type change from `comm#char [16]` to `comm#char[16]`
1005               perf_event.push_back(it->getNameAsString() + "#" + it->getType().getAsString()); //"pid#u32"
1006             }
1007             fe_.perf_events_[name] = perf_event;
1008           }
1009         } else if (memb_name == "perf_submit_skb") {
1010           string skb = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
1011           string skb_len = rewriter_.getRewrittenText(expansionRange(Call->getArg(1)->getSourceRange()));
1012           string meta = rewriter_.getRewrittenText(expansionRange(Call->getArg(2)->getSourceRange()));
1013           string meta_len = rewriter_.getRewrittenText(expansionRange(Call->getArg(3)->getSourceRange()));
1014           txt = "bpf_perf_event_output(" +
1015             skb + ", " +
1016             "(void *)bpf_pseudo_fd(1, " + fd + "), " +
1017             "((__u64)" + skb_len + " << 32) | BPF_F_CURRENT_CPU, " +
1018             meta + ", " +
1019             meta_len + ");";
1020         } else if (memb_name == "get_stackid") {
1021           if (desc->second.type == BPF_MAP_TYPE_STACK_TRACE) {
1022             string arg0 =
1023                 rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
1024             txt = "bcc_get_stackid(";
1025             txt += "bpf_pseudo_fd(1, " + fd + "), " + arg0;
1026             rewrite_end = GET_ENDLOC(Call->getArg(0));
1027             } else {
1028               error(GET_BEGINLOC(Call), "get_stackid only available on stacktrace maps");
1029               return false;
1030             }
1031         } else if (memb_name == "sock_map_update" || memb_name == "sock_hash_update") {
1032           string ctx = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
1033           string keyp = rewriter_.getRewrittenText(expansionRange(Call->getArg(1)->getSourceRange()));
1034           string flag = rewriter_.getRewrittenText(expansionRange(Call->getArg(2)->getSourceRange()));
1035           txt = "bpf_" + string(memb_name) + "(" + ctx + ", " +
1036             "(void *)bpf_pseudo_fd(1, " + fd + "), " + keyp + ", " + flag + ");";
1037         } else if (memb_name == "ringbuf_output") {
1038           string name = string(Ref->getDecl()->getName());
1039           string args = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(0)),
1040                                                            GET_ENDLOC(Call->getArg(2)))));
1041           txt = "bpf_ringbuf_output((void *)bpf_pseudo_fd(1, " + fd + ")";
1042           txt += ", " + args + ")";
1043 
1044           // e.g.
1045           // struct data_t { u32 pid; }; data_t data;
1046           // events.ringbuf_output(&data, sizeof(data), 0);
1047           // ...
1048           //                       &data   ->     data    ->  typeof(data)        ->   data_t
1049           auto type_arg0 = Call->getArg(0)->IgnoreCasts()->getType().getTypePtr()->getPointeeType().getTypePtr();
1050           if (type_arg0->isStructureType()) {
1051             auto event_type = type_arg0->getAsTagDecl();
1052             const auto *r = dyn_cast<RecordDecl>(event_type);
1053             std::vector<std::string> perf_event;
1054 
1055             for (auto it = r->field_begin(); it != r->field_end(); ++it) {
1056               perf_event.push_back(it->getNameAsString() + "#" + it->getType().getAsString()); //"pid#u32"
1057             }
1058             fe_.perf_events_[name] = perf_event;
1059           }
1060         } else if (memb_name == "ringbuf_reserve") {
1061           string name = string(Ref->getDecl()->getName());
1062           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
1063           txt = "bpf_ringbuf_reserve((void *)bpf_pseudo_fd(1, " + fd + ")";
1064           txt += ", " + arg0 + ", 0)"; // Flags in reserve are meaningless
1065         } else if (memb_name == "ringbuf_discard") {
1066           string name = string(Ref->getDecl()->getName());
1067           string args = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(0)),
1068                                                            GET_ENDLOC(Call->getArg(1)))));
1069           txt = "bpf_ringbuf_discard(" + args + ")";
1070         } else if (memb_name == "ringbuf_query") {
1071           string name = string(Ref->getDecl()->getName());
1072           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
1073           txt = "bpf_ringbuf_query((void *)bpf_pseudo_fd(1, " + fd + ")";
1074           txt += ", " + arg0 + ")";
1075         } else if (memb_name == "ringbuf_submit") {
1076           string name = string(Ref->getDecl()->getName());
1077           string args = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(0)),
1078                                                            GET_ENDLOC(Call->getArg(1)))));
1079           txt = "bpf_ringbuf_submit(" + args + ")";
1080 
1081           // e.g.
1082           // struct data_t { u32 pid; };
1083           // data_t *data = events.ringbuf_reserve(sizeof(data_t));
1084           // events.ringbuf_submit(data, 0);
1085           // ...
1086           //                       &data   ->     data    ->  typeof(data)        ->   data_t
1087           auto type_arg0 = Call->getArg(0)->IgnoreCasts()->getType().getTypePtr()->getPointeeType().getTypePtr();
1088           if (type_arg0->isStructureType()) {
1089             auto event_type = type_arg0->getAsTagDecl();
1090             const auto *r = dyn_cast<RecordDecl>(event_type);
1091             std::vector<std::string> perf_event;
1092 
1093             for (auto it = r->field_begin(); it != r->field_end(); ++it) {
1094               perf_event.push_back(it->getNameAsString() + "#" + it->getType().getAsString()); //"pid#u32"
1095             }
1096             fe_.perf_events_[name] = perf_event;
1097           }
1098         } else if (memb_name == "msg_redirect_hash" || memb_name == "sk_redirect_hash") {
1099           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
1100           string args_other = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(1)),
1101                                                            GET_ENDLOC(Call->getArg(2)))));
1102 
1103           txt = "bpf_" + string(memb_name) + "(" + arg0 + ", (void *)bpf_pseudo_fd(1, " + fd + "), ";
1104           txt += args_other + ")";
1105         } else {
1106           if (memb_name == "lookup") {
1107             prefix = "bpf_map_lookup_elem";
1108             suffix = ")";
1109           } else if (memb_name == "update") {
1110             prefix = "bpf_map_update_elem";
1111             suffix = ", BPF_ANY)";
1112           } else if (memb_name == "insert") {
1113             if (desc->second.type == BPF_MAP_TYPE_ARRAY) {
1114               warning(GET_BEGINLOC(Call), "all element of an array already exist; insert() will have no effect");
1115             }
1116             prefix = "bpf_map_update_elem";
1117             suffix = ", BPF_NOEXIST)";
1118           } else if (memb_name == "delete") {
1119             prefix = "bpf_map_delete_elem";
1120             suffix = ")";
1121           } else if (memb_name == "call") {
1122             prefix = "bpf_tail_call_";
1123             suffix = ")";
1124           } else if (memb_name == "perf_read") {
1125             prefix = "bpf_perf_event_read";
1126             suffix = ")";
1127           } else if (memb_name == "perf_counter_value") {
1128             prefix = "bpf_perf_event_read_value";
1129             suffix = ")";
1130           } else if (memb_name == "check_current_task") {
1131             prefix = "bpf_current_task_under_cgroup";
1132             suffix = ")";
1133           } else if (memb_name == "redirect_map") {
1134             prefix = "bpf_redirect_map";
1135             suffix = ")";
1136           } else if (memb_name == "sk_storage_get") {
1137             prefix = "bpf_sk_storage_get";
1138             suffix = ")";
1139           } else if (memb_name == "sk_storage_delete") {
1140             prefix = "bpf_sk_storage_delete";
1141             suffix = ")";
1142           } else if (memb_name == "inode_storage_get") {
1143             prefix = "bpf_inode_storage_get";
1144             suffix = ")";
1145           } else if (memb_name == "inode_storage_delete") {
1146             prefix = "bpf_inode_storage_delete";
1147             suffix = ")";
1148           } else if (memb_name == "task_storage_get") {
1149             prefix = "bpf_task_storage_get";
1150             suffix = ")";
1151           } else if (memb_name == "task_storage_delete") {
1152             prefix = "bpf_task_storage_delete";
1153             suffix = ")";
1154           } else if (memb_name == "get_local_storage") {
1155             prefix = "bpf_get_local_storage";
1156             suffix = ")";
1157           } else if (memb_name == "push") {
1158             prefix = "bpf_map_push_elem";
1159             suffix = ")";
1160           } else if (memb_name == "pop") {
1161             prefix = "bpf_map_pop_elem";
1162             suffix = ")";
1163           } else if (memb_name == "peek") {
1164             prefix = "bpf_map_peek_elem";
1165             suffix = ")";
1166            } else {
1167             error(GET_BEGINLOC(Call), "invalid bpf_table operation %0") << memb_name;
1168             return false;
1169           }
1170           prefix += "((void *)bpf_pseudo_fd(1, " + fd + "), ";
1171 
1172           txt = prefix + args + suffix;
1173         }
1174         if (!rewriter_.isRewritable(rewrite_start) || !rewriter_.isRewritable(rewrite_end)) {
1175           error(GET_BEGINLOC(Call), "cannot use map function inside a macro");
1176           return false;
1177         }
1178         rewriter_.ReplaceText(expansionRange(SourceRange(rewrite_start, rewrite_end)), txt);
1179         return true;
1180       }
1181     }
1182   } else if (Call->getCalleeDecl()) {
1183     NamedDecl *Decl = dyn_cast<NamedDecl>(Call->getCalleeDecl());
1184     if (!Decl) return true;
1185 
1186     string text;
1187 
1188     // Bail out when bpf_probe_read_user is unavailable for overlapping address
1189     // space arch.
1190     bool overlap_addr = false;
1191     std::string probe = check_bpf_probe_read_user(Decl->getName(),
1192                           overlap_addr);
1193     if (overlap_addr) {
1194       error(GET_BEGINLOC(Call), "bpf_probe_read_user not found. Use latest kernel");
1195       return false;
1196     }
1197 
1198     if (AsmLabelAttr *A = Decl->getAttr<AsmLabelAttr>()) {
1199       // Functions with the tag asm("llvm.bpf.extra") are implemented in the
1200       // rewriter rather than as a macro since they may also include nested
1201       // rewrites, and clang::Rewriter does not support rewrites in macros,
1202       // unless one preprocesses the entire source file.
1203       if (A->getLabel() == "llvm.bpf.extra") {
1204         if (!rewriter_.isRewritable(GET_BEGINLOC(Call))) {
1205           error(GET_BEGINLOC(Call), "cannot use builtin inside a macro");
1206           return false;
1207         }
1208 
1209         vector<string> args;
1210 
1211         for (auto arg : Call->arguments())
1212           args.push_back(rewriter_.getRewrittenText(expansionRange(arg->getSourceRange())));
1213 
1214         if (Decl->getName() == "incr_cksum_l3") {
1215           text = "bpf_l3_csum_replace_(" + fn_args_[0]->getName().str() + ", (u64)";
1216           text += args[0] + ", " + args[1] + ", " + args[2] + ", sizeof(" + args[2] + "))";
1217           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1218         } else if (Decl->getName() == "incr_cksum_l4") {
1219           text = "bpf_l4_csum_replace_(" + fn_args_[0]->getName().str() + ", (u64)";
1220           text += args[0] + ", " + args[1] + ", " + args[2];
1221           text += ", ((" + args[3] + " & 0x1) << 4) | sizeof(" + args[2] + "))";
1222           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1223         } else if (Decl->getName() == "bpf_trace_printk") {
1224           checkFormatSpecifiers(args[0], GET_BEGINLOC(Call->getArg(0)));
1225           //  #define bpf_trace_printk(fmt, args...)
1226           //    ({ char _fmt[] = fmt; bpf_trace_printk_(_fmt, sizeof(_fmt), args...); })
1227           text = "({ char _fmt[] = " + args[0] + "; bpf_trace_printk_(_fmt, sizeof(_fmt)";
1228           if (args.size() <= 1) {
1229             text += "); })";
1230             rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1231           } else {
1232             rewriter_.ReplaceText(expansionRange(SourceRange(GET_BEGINLOC(Call), GET_ENDLOC(Call->getArg(0)))), text);
1233             rewriter_.InsertTextAfter(GET_ENDLOC(Call), "); }");
1234           }
1235         } else if (Decl->getName() == "bpf_num_cpus") {
1236           int numcpu = sysconf(_SC_NPROCESSORS_ONLN);
1237           if (numcpu <= 0)
1238             numcpu = 1;
1239           text = to_string(numcpu);
1240           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1241         } else if (Decl->getName() == "bpf_usdt_readarg_p") {
1242           text = "({ u64 __addr = 0x0; ";
1243           text += "_bpf_readarg_" + current_fn_ + "_" + args[0] + "(" +
1244                   args[1] + ", &__addr, sizeof(__addr));";
1245 
1246           bool overlap_addr = false;
1247           text += check_bpf_probe_read_user(StringRef("bpf_probe_read_user"),
1248                   overlap_addr);
1249           if (overlap_addr) {
1250             error(GET_BEGINLOC(Call), "bpf_probe_read_user not found. Use latest kernel");
1251             return false;
1252           }
1253 
1254           text += "(" + args[2] + ", " + args[3] + ", (void *)__addr);";
1255           text += "})";
1256           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1257         } else if (Decl->getName() == "bpf_usdt_readarg") {
1258           text = "_bpf_readarg_" + current_fn_ + "_" + args[0] + "(" + args[1] +
1259                  ", " + args[2] + ", sizeof(*(" + args[2] + ")))";
1260           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1261         }
1262       }
1263     } else if (FunctionDecl *F = dyn_cast<FunctionDecl>(Decl)) {
1264       if (F->isExternallyVisible() && !F->getBuiltinID()) {
1265         auto start_loc = rewriter_.getSourceMgr().getFileLoc(GET_BEGINLOC(Decl));
1266         if (rewriter_.getSourceMgr().getFileID(start_loc)
1267             == rewriter_.getSourceMgr().getMainFileID()) {
1268           error(GET_BEGINLOC(Call), "cannot call non-static helper function");
1269           return false;
1270         }
1271       }
1272     }
1273   }
1274   return true;
1275 }
1276 
checkFormatSpecifiers(const string & fmt,SourceLocation loc)1277 bool BTypeVisitor::checkFormatSpecifiers(const string& fmt, SourceLocation loc) {
1278   unsigned nb_specifiers = 0, i, j;
1279   bool has_s = false;
1280   for (i = 0; i < fmt.length(); i++) {
1281     if (!isascii(fmt[i]) || (!isprint(fmt[i]) && !isspace(fmt[i]))) {
1282       warning(loc.getLocWithOffset(i), "unrecognized character");
1283       return false;
1284     }
1285     if (fmt[i] != '%')
1286       continue;
1287     if (nb_specifiers >= 3) {
1288       warning(loc.getLocWithOffset(i), "cannot use more than 3 conversion specifiers");
1289       return false;
1290     }
1291     nb_specifiers++;
1292     i++;
1293     if (fmt[i] == 'l') {
1294       i++;
1295     } else if (fmt[i] == 'p' || fmt[i] == 's') {
1296       i++;
1297       if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) {
1298         warning(loc.getLocWithOffset(i - 2),
1299                 "only %%d %%u %%x %%ld %%lu %%lx %%lld %%llu %%llx %%p %%s conversion specifiers allowed");
1300         return false;
1301       }
1302       if (fmt[i - 1] == 's') {
1303         if (has_s) {
1304           warning(loc.getLocWithOffset(i - 2), "cannot use several %%s conversion specifiers");
1305           return false;
1306         }
1307         has_s = true;
1308       }
1309       continue;
1310     }
1311     j = 1;
1312     if (fmt[i] == 'l') {
1313       i++;
1314       j++;
1315     }
1316     if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') {
1317       warning(loc.getLocWithOffset(i - j),
1318               "only %%d %%u %%x %%ld %%lu %%lx %%lld %%llu %%llx %%p %%s conversion specifiers allowed");
1319       return false;
1320     }
1321   }
1322   return true;
1323 }
1324 
VisitBinaryOperator(BinaryOperator * E)1325 bool BTypeVisitor::VisitBinaryOperator(BinaryOperator *E) {
1326   if (!E->isAssignmentOp())
1327     return true;
1328   Expr *LHS = E->getLHS()->IgnoreImplicit();
1329   if (MemberExpr *Memb = dyn_cast<MemberExpr>(LHS)) {
1330     if (DeclRefExpr *Base = dyn_cast<DeclRefExpr>(Memb->getBase()->IgnoreImplicit())) {
1331       if (DeprecatedAttr *A = Base->getDecl()->getAttr<DeprecatedAttr>()) {
1332         if (A->getMessage() == "packet") {
1333           if (FieldDecl *F = dyn_cast<FieldDecl>(Memb->getMemberDecl())) {
1334             if (!rewriter_.isRewritable(GET_BEGINLOC(E))) {
1335               error(GET_BEGINLOC(E), "cannot use \"packet\" header type inside a macro");
1336               return false;
1337             }
1338 
1339             auto EndLoc = GET_ENDLOC(E);
1340             if (EndLoc.isMacroID()) {
1341               error(EndLoc, "cannot have macro at the end of expresssion, "
1342                             "workaround: put perentheses around macro \"(MARCO)\"");
1343               return false;
1344             }
1345 
1346             uint64_t ofs = C.getFieldOffset(F);
1347             uint64_t sz = F->isBitField() ? F->getBitWidthValue(C) : C.getTypeSize(F->getType());
1348             string base = rewriter_.getRewrittenText(expansionRange(Base->getSourceRange()));
1349             string text = "bpf_dins_pkt(" + fn_args_[0]->getName().str() + ", (u64)" + base + "+" + to_string(ofs >> 3)
1350                 + ", " + to_string(ofs & 0x7) + ", " + to_string(sz) + ",";
1351             rewriter_.ReplaceText(expansionRange(SourceRange(GET_BEGINLOC(E), E->getOperatorLoc())), text);
1352             rewriter_.InsertTextAfterToken(EndLoc, ")");
1353           }
1354         }
1355       }
1356     }
1357   }
1358   return true;
1359 }
VisitImplicitCastExpr(ImplicitCastExpr * E)1360 bool BTypeVisitor::VisitImplicitCastExpr(ImplicitCastExpr *E) {
1361   // use dext only for RValues
1362   if (E->getCastKind() != CK_LValueToRValue)
1363     return true;
1364   MemberExpr *Memb = dyn_cast<MemberExpr>(E->IgnoreImplicit());
1365   if (!Memb)
1366     return true;
1367   Expr *Base = Memb->getBase()->IgnoreImplicit();
1368   if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Base)) {
1369     if (DeprecatedAttr *A = Ref->getDecl()->getAttr<DeprecatedAttr>()) {
1370       if (A->getMessage() == "packet") {
1371         if (FieldDecl *F = dyn_cast<FieldDecl>(Memb->getMemberDecl())) {
1372           if (!rewriter_.isRewritable(GET_BEGINLOC(E))) {
1373             error(GET_BEGINLOC(E), "cannot use \"packet\" header type inside a macro");
1374             return false;
1375           }
1376           uint64_t ofs = C.getFieldOffset(F);
1377           uint64_t sz = F->isBitField() ? F->getBitWidthValue(C) : C.getTypeSize(F->getType());
1378           string text = "bpf_dext_pkt(" + fn_args_[0]->getName().str() + ", (u64)" + Ref->getDecl()->getName().str() + "+"
1379               + to_string(ofs >> 3) + ", " + to_string(ofs & 0x7) + ", " + to_string(sz) + ")";
1380           rewriter_.ReplaceText(expansionRange(E->getSourceRange()), text);
1381         }
1382       }
1383     }
1384   }
1385   return true;
1386 }
1387 
1388 SourceRange
expansionRange(SourceRange range)1389 BTypeVisitor::expansionRange(SourceRange range) {
1390 #if LLVM_VERSION_MAJOR >= 7
1391   return rewriter_.getSourceMgr().getExpansionRange(range).getAsRange();
1392 #else
1393   return rewriter_.getSourceMgr().getExpansionRange(range);
1394 #endif
1395 }
1396 
1397 template <unsigned N>
error(SourceLocation loc,const char (& fmt)[N])1398 DiagnosticBuilder BTypeVisitor::error(SourceLocation loc, const char (&fmt)[N]) {
1399   unsigned int diag_id = C.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, fmt);
1400   return C.getDiagnostics().Report(loc, diag_id);
1401 }
1402 
1403 template <unsigned N>
warning(SourceLocation loc,const char (& fmt)[N])1404 DiagnosticBuilder BTypeVisitor::warning(SourceLocation loc, const char (&fmt)[N]) {
1405   unsigned int diag_id = C.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Warning, fmt);
1406   return C.getDiagnostics().Report(loc, diag_id);
1407 }
1408 
getFieldValue(VarDecl * Decl,FieldDecl * FDecl,int64_t OrigFValue)1409 int64_t BTypeVisitor::getFieldValue(VarDecl *Decl, FieldDecl *FDecl, int64_t OrigFValue) {
1410   unsigned idx = FDecl->getFieldIndex();
1411 
1412   if (auto I = dyn_cast_or_null<InitListExpr>(Decl->getInit())) {
1413 #if LLVM_VERSION_MAJOR >= 8
1414     Expr::EvalResult res;
1415     if (I->getInit(idx)->EvaluateAsInt(res, C)) {
1416       return res.Val.getInt().getExtValue();
1417     }
1418 #else
1419     llvm::APSInt res;
1420     if (I->getInit(idx)->EvaluateAsInt(res, C)) {
1421       return res.getExtValue();
1422     }
1423 #endif
1424   }
1425 
1426   return OrigFValue;
1427 }
1428 
1429 // Open table FDs when bpf tables (as denoted by section("maps*") attribute)
1430 // are declared.
VisitVarDecl(VarDecl * Decl)1431 bool BTypeVisitor::VisitVarDecl(VarDecl *Decl) {
1432   const RecordType *R = Decl->getType()->getAs<RecordType>();
1433   if (SectionAttr *A = Decl->getAttr<SectionAttr>()) {
1434     if (!A->getName().startswith("maps"))
1435       return true;
1436     if (!R) {
1437       error(GET_ENDLOC(Decl), "invalid type for bpf_table, expect struct");
1438       return false;
1439     }
1440     const RecordDecl *RD = R->getDecl()->getDefinition();
1441 
1442     TableDesc table;
1443     TableStorage::iterator table_it;
1444     table.name = string(Decl->getName());
1445     Path local_path({fe_.id(), table.name});
1446     Path maps_ns_path({"ns", fe_.maps_ns(), table.name});
1447     Path global_path({table.name});
1448     QualType key_type, leaf_type;
1449 
1450     unsigned i = 0;
1451     for (auto F : RD->fields()) {
1452       if (F->getType().getTypePtr()->isIncompleteType()) {
1453         error(GET_BEGINLOC(F), "unknown type");
1454         return false;
1455       }
1456 
1457       size_t sz = C.getTypeSize(F->getType()) >> 3;
1458       if (F->getName() == "key") {
1459         if (sz == 0) {
1460           error(GET_BEGINLOC(F), "invalid zero-sized leaf");
1461           return false;
1462         }
1463         table.key_size = sz;
1464         key_type = F->getType();
1465       } else if (F->getName() == "leaf") {
1466         if (sz == 0) {
1467           error(GET_BEGINLOC(F), "invalid zero-sized leaf");
1468           return false;
1469         }
1470         table.leaf_size = sz;
1471         leaf_type = F->getType();
1472       } else if (F->getName() == "max_entries") {
1473             table.max_entries = getFieldValue(Decl, F, table.max_entries);
1474       } else if (F->getName() == "flags") {
1475             table.flags = getFieldValue(Decl, F, table.flags);
1476       }
1477       ++i;
1478     }
1479 
1480     std::string section_attr = string(A->getName()), pinned;
1481     size_t pinned_path_pos = section_attr.find(":");
1482     // 0 is not a valid map ID, -1 is to create and pin it to file
1483     int pinned_id = 0;
1484 
1485     if (pinned_path_pos != std::string::npos) {
1486       pinned = section_attr.substr(pinned_path_pos + 1);
1487       section_attr = section_attr.substr(0, pinned_path_pos);
1488       int fd = bpf_obj_get(pinned.c_str());
1489       if (fd < 0) {
1490         if (bcc_make_parent_dir(pinned.c_str()) ||
1491             bcc_check_bpffs_path(pinned.c_str())) {
1492           return false;
1493         }
1494 
1495         pinned_id = -1;
1496       } else {
1497         struct bpf_map_info info = {};
1498         unsigned int info_len = sizeof(info);
1499 
1500         if (bpf_obj_get_info_by_fd(fd, &info, &info_len)) {
1501           error(GET_BEGINLOC(Decl), "get map info failed: %0")
1502                 << strerror(errno);
1503           return false;
1504         }
1505 
1506         pinned_id = info.id;
1507       }
1508 
1509       close(fd);
1510     }
1511 
1512     // Additional map specific information
1513     size_t map_info_pos = section_attr.find("$");
1514     std::string inner_map_name;
1515 
1516     if (map_info_pos != std::string::npos) {
1517       std::string map_info = section_attr.substr(map_info_pos + 1);
1518       section_attr = section_attr.substr(0, map_info_pos);
1519       if (section_attr == "maps/array_of_maps" ||
1520           section_attr == "maps/hash_of_maps") {
1521         inner_map_name = map_info;
1522       }
1523     }
1524 
1525     bpf_map_type map_type = BPF_MAP_TYPE_UNSPEC;
1526     if (section_attr == "maps/hash") {
1527       map_type = BPF_MAP_TYPE_HASH;
1528     } else if (section_attr == "maps/array") {
1529       map_type = BPF_MAP_TYPE_ARRAY;
1530     } else if (section_attr == "maps/percpu_hash") {
1531       map_type = BPF_MAP_TYPE_PERCPU_HASH;
1532     } else if (section_attr == "maps/percpu_array") {
1533       map_type = BPF_MAP_TYPE_PERCPU_ARRAY;
1534     } else if (section_attr == "maps/lru_hash") {
1535       map_type = BPF_MAP_TYPE_LRU_HASH;
1536     } else if (section_attr == "maps/lru_percpu_hash") {
1537       map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH;
1538     } else if (section_attr == "maps/lpm_trie") {
1539       map_type = BPF_MAP_TYPE_LPM_TRIE;
1540     } else if (section_attr == "maps/histogram") {
1541       map_type = BPF_MAP_TYPE_HASH;
1542       if (key_type->isSpecificBuiltinType(BuiltinType::Int))
1543         map_type = BPF_MAP_TYPE_ARRAY;
1544       if (!leaf_type->isSpecificBuiltinType(BuiltinType::ULongLong))
1545         error(GET_BEGINLOC(Decl), "histogram leaf type must be u64, got %0") << leaf_type;
1546     } else if (section_attr == "maps/prog") {
1547       map_type = BPF_MAP_TYPE_PROG_ARRAY;
1548     } else if (section_attr == "maps/perf_output") {
1549       map_type = BPF_MAP_TYPE_PERF_EVENT_ARRAY;
1550       int numcpu = get_possible_cpus().size();
1551       if (numcpu <= 0)
1552         numcpu = 1;
1553       table.max_entries = numcpu;
1554     } else if (section_attr == "maps/ringbuf") {
1555       map_type = BPF_MAP_TYPE_RINGBUF;
1556       // values from libbpf/src/libbpf_probes.c
1557       table.key_size = 0;
1558       table.leaf_size = 0;
1559     } else if (section_attr == "maps/perf_array") {
1560       map_type = BPF_MAP_TYPE_PERF_EVENT_ARRAY;
1561     } else if (section_attr == "maps/queue") {
1562       table.key_size = 0;
1563       map_type = BPF_MAP_TYPE_QUEUE;
1564     } else if (section_attr == "maps/stack") {
1565       table.key_size = 0;
1566       map_type = BPF_MAP_TYPE_STACK;
1567     } else if (section_attr == "maps/cgroup_array") {
1568       map_type = BPF_MAP_TYPE_CGROUP_ARRAY;
1569     } else if (section_attr == "maps/stacktrace") {
1570       map_type = BPF_MAP_TYPE_STACK_TRACE;
1571     } else if (section_attr == "maps/devmap") {
1572       map_type = BPF_MAP_TYPE_DEVMAP;
1573     } else if (section_attr == "maps/cpumap") {
1574       map_type = BPF_MAP_TYPE_CPUMAP;
1575     } else if (section_attr == "maps/xskmap") {
1576       map_type = BPF_MAP_TYPE_XSKMAP;
1577     } else if (section_attr == "maps/hash_of_maps") {
1578       map_type = BPF_MAP_TYPE_HASH_OF_MAPS;
1579     } else if (section_attr == "maps/array_of_maps") {
1580       map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
1581     } else if (section_attr == "maps/sk_storage") {
1582       map_type = BPF_MAP_TYPE_SK_STORAGE;
1583     } else if (section_attr == "maps/inode_storage") {
1584       map_type = BPF_MAP_TYPE_INODE_STORAGE;
1585     } else if (section_attr == "maps/task_storage") {
1586       map_type = BPF_MAP_TYPE_TASK_STORAGE;
1587     } else if (section_attr == "maps/sockmap") {
1588       map_type = BPF_MAP_TYPE_SOCKMAP;
1589     } else if (section_attr == "maps/sockhash") {
1590       map_type = BPF_MAP_TYPE_SOCKHASH;
1591     } else if (section_attr == "maps/cgroup_storage") {
1592       map_type = BPF_MAP_TYPE_CGROUP_STORAGE;
1593     } else if (section_attr == "maps/percpu_cgroup_storage") {
1594       map_type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
1595     } else if (section_attr == "maps/extern") {
1596       if (!fe_.table_storage().Find(maps_ns_path, table_it)) {
1597         if (!fe_.table_storage().Find(global_path, table_it)) {
1598           error(GET_BEGINLOC(Decl), "reference to undefined table");
1599           return false;
1600         }
1601       }
1602       table = table_it->second.dup();
1603       table.is_extern = true;
1604     } else if (section_attr == "maps/export") {
1605       if (table.name.substr(0, 2) == "__")
1606         table.name = table.name.substr(2);
1607       Path local_path({fe_.id(), table.name});
1608       Path global_path({table.name});
1609       if (!fe_.table_storage().Find(local_path, table_it)) {
1610         error(GET_BEGINLOC(Decl), "reference to undefined table");
1611         return false;
1612       }
1613       fe_.table_storage().Insert(global_path, table_it->second.dup());
1614       return true;
1615     } else if(section_attr == "maps/shared") {
1616       if (table.name.substr(0, 2) == "__")
1617         table.name = table.name.substr(2);
1618       Path local_path({fe_.id(), table.name});
1619       Path maps_ns_path({"ns", fe_.maps_ns(), table.name});
1620       if (!fe_.table_storage().Find(local_path, table_it)) {
1621         error(GET_BEGINLOC(Decl), "reference to undefined table");
1622         return false;
1623       }
1624       fe_.table_storage().Insert(maps_ns_path, table_it->second.dup());
1625       return true;
1626     }
1627 
1628     if (!table.is_extern) {
1629       if (map_type == BPF_MAP_TYPE_UNSPEC) {
1630         error(GET_BEGINLOC(Decl), "unsupported map type: %0") << section_attr;
1631         return false;
1632       }
1633 
1634       table.type = map_type;
1635       table.fake_fd = fe_.get_next_fake_fd();
1636       fe_.add_map_def(table.fake_fd, std::make_tuple((int)map_type, std::string(table.name),
1637                       (int)table.key_size, (int)table.leaf_size,
1638                       (int)table.max_entries, table.flags, pinned_id,
1639                       inner_map_name, pinned));
1640     }
1641 
1642     if (!table.is_extern)
1643       fe_.table_storage().VisitMapType(table, C, key_type, leaf_type);
1644     fe_.table_storage().Insert(local_path, move(table));
1645   } else if (const PointerType *P = Decl->getType()->getAs<PointerType>()) {
1646     // if var is a pointer to a packet type, clone the annotation into the var
1647     // decl so that the packet dext/dins rewriter can catch it
1648     if (const RecordType *RT = P->getPointeeType()->getAs<RecordType>()) {
1649       if (const RecordDecl *RD = RT->getDecl()->getDefinition()) {
1650         if (DeprecatedAttr *DA = RD->getAttr<DeprecatedAttr>()) {
1651           if (DA->getMessage() == "packet") {
1652             Decl->addAttr(DA->clone(C));
1653           }
1654         }
1655       }
1656     }
1657   }
1658   return true;
1659 }
1660 
1661 // First traversal of AST to retrieve maps with external pointers.
BTypeConsumer(ASTContext & C,BFrontendAction & fe,Rewriter & rewriter,set<Decl * > & m)1662 BTypeConsumer::BTypeConsumer(ASTContext &C, BFrontendAction &fe,
1663                              Rewriter &rewriter, set<Decl *> &m)
1664     : fe_(fe),
1665       map_visitor_(m),
1666       btype_visitor_(C, fe),
1667       probe_visitor1_(C, rewriter, m, true),
1668       probe_visitor2_(C, rewriter, m, false) {}
1669 
HandleTranslationUnit(ASTContext & Context)1670 void BTypeConsumer::HandleTranslationUnit(ASTContext &Context) {
1671   DeclContext::decl_iterator it;
1672   DeclContext *DC = TranslationUnitDecl::castToDeclContext(Context.getTranslationUnitDecl());
1673 
1674   /**
1675    * In a first traversal, ProbeVisitor tracks external pointers identified
1676    * through each function's arguments and replaces their dereferences with
1677    * calls to bpf_probe_read. It also passes all identified pointers to
1678    * external addresses to MapVisitor.
1679    */
1680   for (it = DC->decls_begin(); it != DC->decls_end(); it++) {
1681     Decl *D = *it;
1682     if (FunctionDecl *F = dyn_cast<FunctionDecl>(D)) {
1683       if (fe_.is_rewritable_ext_func(F)) {
1684         for (auto arg : F->parameters()) {
1685           if (arg == F->getParamDecl(0)) {
1686             /**
1687              * Limit tracing of pointers from context to tracing contexts.
1688              * We're whitelisting instead of blacklisting to avoid issues with
1689              * existing programs if new context types are added in the future.
1690              */
1691             string type = arg->getType().getAsString();
1692             if (type == "struct pt_regs *" ||
1693                 type == "struct bpf_raw_tracepoint_args *" ||
1694                 type.substr(0, 19) == "struct tracepoint__")
1695               probe_visitor1_.set_ctx(arg);
1696           } else if (!arg->getType()->isFundamentalType()) {
1697             tuple<Decl *, int> pt = make_tuple(arg, 0);
1698             probe_visitor1_.set_ptreg(pt);
1699           }
1700         }
1701 
1702         probe_visitor1_.TraverseDecl(D);
1703         for (auto ptreg : probe_visitor1_.get_ptregs()) {
1704           map_visitor_.set_ptreg(ptreg);
1705         }
1706       }
1707     }
1708   }
1709 
1710   /**
1711    * MapVisitor uses external pointers identified by the first ProbeVisitor
1712    * traversal to identify all maps with external pointers as values.
1713    * MapVisitor runs only after ProbeVisitor finished its traversal of the
1714    * whole translation unit to clearly separate the role of each ProbeVisitor's
1715    * traversal: the first tracks external pointers from function arguments,
1716    * whereas the second tracks external pointers from maps. Without this clear
1717    * separation, ProbeVisitor might attempt to replace several times the same
1718    * dereferences.
1719    */
1720   for (it = DC->decls_begin(); it != DC->decls_end(); it++) {
1721     Decl *D = *it;
1722     if (FunctionDecl *F = dyn_cast<FunctionDecl>(D)) {
1723       if (fe_.is_rewritable_ext_func(F)) {
1724         map_visitor_.TraverseDecl(D);
1725       }
1726     }
1727   }
1728 
1729   /**
1730    * In a second traversal, ProbeVisitor tracks pointers passed through the
1731    * maps identified by MapVisitor and replaces their dereferences with calls
1732    * to bpf_probe_read.
1733    * This last traversal runs after MapVisitor went through an entire
1734    * translation unit, to ensure maps with external pointers have all been
1735    * identified.
1736    */
1737   for (it = DC->decls_begin(); it != DC->decls_end(); it++) {
1738     Decl *D = *it;
1739     if (FunctionDecl *F = dyn_cast<FunctionDecl>(D)) {
1740       if (fe_.is_rewritable_ext_func(F)) {
1741         probe_visitor2_.TraverseDecl(D);
1742       }
1743     }
1744 
1745     btype_visitor_.TraverseDecl(D);
1746   }
1747 
1748 }
1749 
BFrontendAction(llvm::raw_ostream & os,unsigned flags,TableStorage & ts,const std::string & id,const std::string & main_path,ProgFuncInfo & prog_func_info,std::string & mod_src,const std::string & maps_ns,fake_fd_map_def & fake_fd_map,std::map<std::string,std::vector<std::string>> & perf_events)1750 BFrontendAction::BFrontendAction(
1751     llvm::raw_ostream &os, unsigned flags, TableStorage &ts,
1752     const std::string &id, const std::string &main_path,
1753     ProgFuncInfo &prog_func_info, std::string &mod_src,
1754     const std::string &maps_ns, fake_fd_map_def &fake_fd_map,
1755     std::map<std::string, std::vector<std::string>> &perf_events)
1756     : os_(os),
1757       flags_(flags),
1758       ts_(ts),
1759       id_(id),
1760       maps_ns_(maps_ns),
1761       rewriter_(new Rewriter),
1762       main_path_(main_path),
1763       prog_func_info_(prog_func_info),
1764       mod_src_(mod_src),
1765       next_fake_fd_(-1),
1766       fake_fd_map_(fake_fd_map),
1767       perf_events_(perf_events) {}
1768 
is_rewritable_ext_func(FunctionDecl * D)1769 bool BFrontendAction::is_rewritable_ext_func(FunctionDecl *D) {
1770   StringRef file_name = rewriter_->getSourceMgr().getFilename(GET_BEGINLOC(D));
1771   return (D->isExternallyVisible() && D->hasBody() &&
1772           (file_name.empty() || file_name == main_path_));
1773 }
1774 
DoMiscWorkAround()1775 void BFrontendAction::DoMiscWorkAround() {
1776   // In 4.16 and later, CONFIG_CC_STACKPROTECTOR is moved out of Kconfig and into
1777   // Makefile. It will be set depending on CONFIG_CC_STACKPROTECTOR_{AUTO|REGULAR|STRONG}.
1778   // CONFIG_CC_STACKPROTECTOR is still used in various places, e.g., struct task_struct,
1779   // to guard certain fields. The workaround here intends to define
1780   // CONFIG_CC_STACKPROTECTOR properly based on other configs, so it relieved any bpf
1781   // program (using task_struct, etc.) of patching the below code.
1782   std::string probefunc = check_bpf_probe_read_kernel();
1783   if (kresolver) {
1784     bcc_free_symcache(kresolver, -1);
1785     kresolver = NULL;
1786   }
1787   if (probefunc == "bpf_probe_read") {
1788     probefunc = "#define bpf_probe_read_kernel bpf_probe_read\n"
1789       "#define bpf_probe_read_kernel_str bpf_probe_read_str\n"
1790       "#define bpf_probe_read_user bpf_probe_read\n"
1791       "#define bpf_probe_read_user_str bpf_probe_read_str\n";
1792   }
1793   else {
1794     probefunc = "";
1795   }
1796   std::string prologue = "#if defined(BPF_LICENSE)\n"
1797     "#error BPF_LICENSE cannot be specified through cflags\n"
1798     "#endif\n"
1799     "#if !defined(CONFIG_CC_STACKPROTECTOR)\n"
1800     "#if defined(CONFIG_CC_STACKPROTECTOR_AUTO) \\\n"
1801     "    || defined(CONFIG_CC_STACKPROTECTOR_REGULAR) \\\n"
1802     "    || defined(CONFIG_CC_STACKPROTECTOR_STRONG)\n"
1803     "#define CONFIG_CC_STACKPROTECTOR\n"
1804     "#endif\n"
1805     "#endif\n";
1806   prologue = prologue + probefunc;
1807   rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).InsertText(0,
1808     prologue,
1809     false);
1810 
1811   rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).InsertTextAfter(
1812 #if LLVM_VERSION_MAJOR >= 12
1813     rewriter_->getSourceMgr().getBufferOrFake(rewriter_->getSourceMgr().getMainFileID()).getBufferSize(),
1814 #else
1815     rewriter_->getSourceMgr().getBuffer(rewriter_->getSourceMgr().getMainFileID())->getBufferSize(),
1816 #endif
1817     "\n#include <bcc/footer.h>\n");
1818 }
1819 
EndSourceFileAction()1820 void BFrontendAction::EndSourceFileAction() {
1821   // Additional misc rewrites
1822   DoMiscWorkAround();
1823 
1824   if (flags_ & DEBUG_PREPROCESSOR)
1825     rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).write(llvm::errs());
1826 #if LLVM_VERSION_MAJOR >= 9
1827   llvm::raw_string_ostream tmp_os(mod_src_);
1828   rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID())
1829       .write(tmp_os);
1830 #else
1831   if (flags_ & DEBUG_SOURCE) {
1832     llvm::raw_string_ostream tmp_os(mod_src_);
1833     rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID())
1834         .write(tmp_os);
1835   }
1836 #endif
1837 
1838   for (auto func : func_range_) {
1839     auto f = func.first;
1840     string bd = rewriter_->getRewrittenText(func_range_[f]);
1841     auto fn = prog_func_info_.get_func(f);
1842     if (fn)
1843       fn->src_rewritten_ = bd;
1844   }
1845   rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).write(os_);
1846   os_.flush();
1847 }
1848 
CreateASTConsumer(CompilerInstance & Compiler,llvm::StringRef InFile)1849 unique_ptr<ASTConsumer> BFrontendAction::CreateASTConsumer(CompilerInstance &Compiler, llvm::StringRef InFile) {
1850   rewriter_->setSourceMgr(Compiler.getSourceManager(), Compiler.getLangOpts());
1851   vector<unique_ptr<ASTConsumer>> consumers;
1852   consumers.push_back(unique_ptr<ASTConsumer>(new BTypeConsumer(Compiler.getASTContext(), *this, *rewriter_, m_)));
1853   return unique_ptr<ASTConsumer>(new MultiplexConsumer(std::move(consumers)));
1854 }
1855 
1856 }
1857