xref: /aosp_15_r20/external/bcc/src/cc/bpf_module_rw_engine.cc (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1 /*
2  * Copyright (c) 2015 PLUMgrid, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <map>
17 #include <string>
18 #include <vector>
19 
20 #include <llvm/ExecutionEngine/MCJIT.h>
21 #include <llvm/IR/IRBuilder.h>
22 #include <llvm/Support/TargetSelect.h>
23 
24 #include "common.h"
25 #include "bpf_module.h"
26 #include "table_storage.h"
27 
28 namespace ebpf {
29 
30 using std::map;
31 using std::move;
32 using std::string;
33 using std::unique_ptr;
34 using std::vector;
35 using namespace llvm;
36 
bpf_module_rw_engine_enabled(void)37 bool bpf_module_rw_engine_enabled(void) {
38   return true;
39 }
40 
initialize_rw_engine()41 void BPFModule::initialize_rw_engine() {
42   InitializeNativeTarget();
43   InitializeNativeTargetAsmPrinter();
44 }
45 
cleanup_rw_engine()46 void BPFModule::cleanup_rw_engine() {
47   rw_engine_.reset();
48 }
49 
createLoad(IRBuilder<> & B,Value * addr,bool isVolatile=false)50 static LoadInst *createLoad(IRBuilder<> &B, Value *addr, bool isVolatile = false)
51 {
52 #if LLVM_VERSION_MAJOR >= 15
53   if (isa<AllocaInst>(addr))
54     return B.CreateLoad(dyn_cast<AllocaInst>(addr)->getAllocatedType(), addr, isVolatile);
55   else
56     return B.CreateLoad(addr->getType(), addr, isVolatile);
57 #elif LLVM_VERSION_MAJOR >= 13
58   return B.CreateLoad(addr->getType()->getPointerElementType(), addr, isVolatile);
59 #else
60   return B.CreateLoad(addr, isVolatile);
61 #endif
62 }
63 
createInBoundsGEP(IRBuilder<> & B,Value * ptr,ArrayRef<Value * > idxlist)64 static Value *createInBoundsGEP(IRBuilder<> &B, Value *ptr, ArrayRef<Value *>idxlist)
65 {
66 #if LLVM_VERSION_MAJOR >= 15
67   if (isa<GlobalValue>(ptr))
68     return B.CreateInBoundsGEP(dyn_cast<GlobalValue>(ptr)->getValueType(), ptr, idxlist);
69   else
70     return B.CreateInBoundsGEP(ptr->getType(), ptr, idxlist);
71 #elif LLVM_VERSION_MAJOR >= 13
72   return B.CreateInBoundsGEP(ptr->getType()->getScalarType()->getPointerElementType(),
73                              ptr, idxlist);
74 #else
75   return B.CreateInBoundsGEP(ptr, idxlist);
76 #endif
77 }
78 
debug_printf(Module * mod,IRBuilder<> & B,const string & fmt,vector<Value * > args)79 static void debug_printf(Module *mod, IRBuilder<> &B, const string &fmt, vector<Value *> args) {
80   GlobalVariable *fmt_gvar = B.CreateGlobalString(fmt, "fmt");
81   args.insert(args.begin(), createInBoundsGEP(B, fmt_gvar, vector<Value *>({B.getInt64(0), B.getInt64(0)})));
82   args.insert(args.begin(), B.getInt64((uintptr_t)stderr));
83   Function *fprintf_fn = mod->getFunction("fprintf");
84   if (!fprintf_fn) {
85     vector<Type *> fprintf_fn_args({B.getInt64Ty(), B.getInt8PtrTy()});
86     FunctionType *fprintf_fn_type = FunctionType::get(B.getInt32Ty(), fprintf_fn_args, /*isvarArg=*/true);
87     fprintf_fn = Function::Create(fprintf_fn_type, GlobalValue::ExternalLinkage, "fprintf", mod);
88     fprintf_fn->setCallingConv(CallingConv::C);
89     fprintf_fn->addFnAttr(Attribute::NoUnwind);
90   }
91   B.CreateCall(fprintf_fn, args);
92 }
93 
finish_sscanf(IRBuilder<> & B,vector<Value * > * args,string * fmt,const map<string,Value * > & locals,bool exact_args)94 static void finish_sscanf(IRBuilder<> &B, vector<Value *> *args, string *fmt,
95                           const map<string, Value *> &locals, bool exact_args) {
96   // fmt += "%n";
97   // int nread = 0;
98   // int n = sscanf(s, fmt, args..., &nread);
99   // if (n < 0) return -1;
100   // s = &s[nread];
101   Value *sptr = locals.at("sptr");
102   Value *nread = locals.at("nread");
103   Function *cur_fn = B.GetInsertBlock()->getParent();
104   Function *sscanf_fn = B.GetInsertBlock()->getModule()->getFunction("sscanf");
105   *fmt += "%n";
106   B.CreateStore(B.getInt32(0), nread);
107   GlobalVariable *fmt_gvar = B.CreateGlobalString(*fmt, "fmt");
108   (*args)[1] = createInBoundsGEP(B, fmt_gvar, {B.getInt64(0), B.getInt64(0)});
109   (*args)[0] = createLoad(B, sptr);
110   args->push_back(nread);
111   CallInst *call = B.CreateCall(sscanf_fn, *args);
112   call->setTailCall(true);
113 
114   BasicBlock *label_true = BasicBlock::Create(B.getContext(), "", cur_fn);
115   BasicBlock *label_false = BasicBlock::Create(B.getContext(), "", cur_fn);
116 
117   // exact_args means fail if don't consume exact number of "%" inputs
118   // exact_args is disabled for string parsing (empty case)
119   Value *cond = exact_args ? B.CreateICmpNE(call, B.getInt32(args->size() - 3))
120                            : B.CreateICmpSLT(call, B.getInt32(0));
121   B.CreateCondBr(cond, label_true, label_false);
122 
123   B.SetInsertPoint(label_true);
124   B.CreateRet(B.getInt32(-1));
125 
126   B.SetInsertPoint(label_false);
127   // s = &s[nread];
128   B.CreateStore(
129       createInBoundsGEP(B, createLoad(B, sptr), {createLoad(B, nread, true)}), sptr);
130 
131   args->resize(2);
132   fmt->clear();
133 }
134 
135 // recursive helper to capture the arguments
parse_type(IRBuilder<> & B,vector<Value * > * args,string * fmt,Type * type,Value * out,const map<string,Value * > & locals,bool is_writer)136 static void parse_type(IRBuilder<> &B, vector<Value *> *args, string *fmt,
137                        Type *type, Value *out,
138                        const map<string, Value *> &locals, bool is_writer) {
139   if (StructType *st = dyn_cast<StructType>(type)) {
140     *fmt += "{ ";
141     unsigned idx = 0;
142     for (auto field : st->elements()) {
143       parse_type(B, args, fmt, field, B.CreateStructGEP(type, out, idx++),
144                  locals, is_writer);
145       *fmt += " ";
146     }
147     *fmt += "}";
148   } else if (ArrayType *at = dyn_cast<ArrayType>(type)) {
149     if (at->getElementType() == B.getInt8Ty()) {
150       // treat i8[] as a char string instead of as an array of u8's
151       if (is_writer) {
152         *fmt += "\"%s\"";
153         args->push_back(out);
154       } else {
155         // When reading strings, scanf doesn't support empty "", so we need to
156         // break this up into multiple scanf calls. To understand it, let's take
157         // an example:
158         // struct Event {
159         //   u32 a;
160         //   struct {
161         //     char x[64];
162         //     int y;
163         //   } b[2];
164         //   u32 c;
165         // };
166         // The writer string would look like:
167         //  "{ 0x%x [ { \"%s\" 0x%x } { \"%s\" 0x%x } ] 0x%x }"
168         // But the reader string needs to restart at each \"\".
169         //  reader0(const char *s, struct Event *val) {
170         //    int nread, rc;
171         //    nread = 0;
172         //    rc = sscanf(s, "{ %i [ { \"%n", &val->a, &nread);
173         //    if (rc != 1) return -1;
174         //    s += nread; nread = 0;
175         //    rc = sscanf(s, "%[^\"]%n", &val->b[0].x, &nread);
176         //    if (rc < 0) return -1;
177         //    s += nread; nread = 0;
178         //    rc = sscanf(s, "\" %i } { \"%n", &val->b[0].y, &nread);
179         //    if (rc != 1) return -1;
180         //    s += nread; nread = 0;
181         //    rc = sscanf(s, "%[^\"]%n", &val->b[1].x, &nread);
182         //    if (rc < 0) return -1;
183         //    s += nread; nread = 0;
184         //    rc = sscanf(s, "\" %i } ] %i }%n", &val->b[1].y, &val->c, &nread);
185         //    if (rc != 2) return -1;
186         //    s += nread; nread = 0;
187         //    return 0;
188         //  }
189         *fmt += "\"";
190         finish_sscanf(B, args, fmt, locals, true);
191 
192         *fmt = "%[^\"]";
193         args->push_back(out);
194         finish_sscanf(B, args, fmt, locals, false);
195 
196         *fmt = "\"";
197       }
198     } else {
199       *fmt += "[ ";
200       for (size_t i = 0; i < at->getNumElements(); ++i) {
201         parse_type(B, args, fmt, at->getElementType(),
202                    B.CreateStructGEP(type, out, i), locals, is_writer);
203         *fmt += " ";
204       }
205       *fmt += "]";
206     }
207   } else if (isa<PointerType>(type)) {
208     *fmt += "0xl";
209     if (is_writer)
210       *fmt += "x";
211     else
212       *fmt += "i";
213   } else if (IntegerType *it = dyn_cast<IntegerType>(type)) {
214     if (is_writer)
215       *fmt += "0x";
216     if (it->getBitWidth() <= 8)
217       *fmt += "%hh";
218     else if (it->getBitWidth() <= 16)
219       *fmt += "%h";
220     else if (it->getBitWidth() <= 32)
221       *fmt += "%";
222     else
223       *fmt += "%l";
224     if (is_writer)
225       *fmt += "x";
226     else
227       *fmt += "i";
228     args->push_back(is_writer ? createLoad(B, out) : out);
229   }
230 }
231 
232 // make_reader generates a dynamic function in the instruction set of the host
233 // (not bpf) that is able to convert c-strings in the pretty-print format of
234 // make_writer back into binary representations. The encoding of the string
235 // takes the llvm ir structure format, which closely maps the c structure but
236 // not exactly (no support for unions for instance).
237 // The general algorithm is:
238 //  pod types (u8..u64)                <= %i
239 //  array types
240 //   u8[]  no nested quotes :(         <= "..."
241 //   !u8[]                             <= [ %i %i ... ]
242 //  struct types
243 //   struct { u8 a; u64 b; }           <= { %i %i }
244 //  nesting is supported
245 //   struct { struct { u8 a[]; }; }    <= { "" }
246 //   struct { struct { u64 a[]; }; }   <= { [ %i %i .. ] }
make_reader(Module * mod,Type * type)247 string BPFModule::make_reader(Module *mod, Type *type) {
248   auto fn_it = readers_.find(type);
249   if (fn_it != readers_.end())
250     return fn_it->second;
251 
252   // int read(const char *in, Type *out) {
253   //   int n = sscanf(in, "{ %i ... }", &out->field1, ...);
254   //   if (n != num_fields) return -1;
255   //   return 0;
256   // }
257 
258   IRBuilder<> B(*ctx_);
259 
260   FunctionType *sscanf_fn_type = FunctionType::get(
261       B.getInt32Ty(), {B.getInt8PtrTy(), B.getInt8PtrTy()}, /*isVarArg=*/true);
262   Function *sscanf_fn = mod->getFunction("sscanf");
263   if (!sscanf_fn) {
264     sscanf_fn = Function::Create(sscanf_fn_type, GlobalValue::ExternalLinkage,
265                                  "sscanf", mod);
266     sscanf_fn->setCallingConv(CallingConv::C);
267     sscanf_fn->addFnAttr(Attribute::NoUnwind);
268   }
269 
270   string name = "reader" + std::to_string(readers_.size());
271   vector<Type *> fn_args({B.getInt8PtrTy(), PointerType::getUnqual(type)});
272   FunctionType *fn_type = FunctionType::get(B.getInt32Ty(), fn_args, /*isVarArg=*/false);
273   Function *fn =
274       Function::Create(fn_type, GlobalValue::ExternalLinkage, name, mod);
275   auto arg_it = fn->arg_begin();
276   Argument *arg_in = &*arg_it;
277   ++arg_it;
278   arg_in->setName("in");
279   Argument *arg_out = &*arg_it;
280   ++arg_it;
281   arg_out->setName("out");
282 
283   BasicBlock *label_entry = BasicBlock::Create(*ctx_, "entry", fn);
284   B.SetInsertPoint(label_entry);
285 
286   Value *nread = B.CreateAlloca(B.getInt32Ty());
287   Value *sptr = B.CreateAlloca(B.getInt8PtrTy());
288   map<string, Value *> locals{{"nread", nread}, {"sptr", sptr}};
289   B.CreateStore(arg_in, sptr);
290   vector<Value *> args({nullptr, nullptr});
291   string fmt;
292   parse_type(B, &args, &fmt, type, arg_out, locals, false);
293 
294   if (0)
295     debug_printf(mod, B, "%p %p\n", vector<Value *>({arg_in, arg_out}));
296 
297   finish_sscanf(B, &args, &fmt, locals, true);
298 
299   B.CreateRet(B.getInt32(0));
300 
301   readers_[type] = name;
302   return name;
303 }
304 
305 // make_writer generates a dynamic function in the instruction set of the host
306 // (not bpf) that is able to pretty-print key/leaf entries as a c-string. The
307 // encoding of the string takes the llvm ir structure format, which closely maps
308 // the c structure but not exactly (no support for unions for instance).
309 // The general algorithm is:
310 //  pod types (u8..u64)                => 0x%x
311 //  array types
312 //   u8[]                              => "..."
313 //   !u8[]                             => [ 0x%x 0x%x ... ]
314 //  struct types
315 //   struct { u8 a; u64 b; }           => { 0x%x 0x%x }
316 //  nesting is supported
317 //   struct { struct { u8 a[]; }; }    => { "" }
318 //   struct { struct { u64 a[]; }; }   => { [ 0x%x 0x%x .. ] }
make_writer(Module * mod,Type * type)319 string BPFModule::make_writer(Module *mod, Type *type) {
320   auto fn_it = writers_.find(type);
321   if (fn_it != writers_.end())
322     return fn_it->second;
323 
324   // int write(int len, char *out, Type *in) {
325   //   return snprintf(out, len, "{ %i ... }", out->field1, ...);
326   // }
327 
328   IRBuilder<> B(*ctx_);
329 
330   string name = "writer" + std::to_string(writers_.size());
331   vector<Type *> fn_args({B.getInt8PtrTy(), B.getInt64Ty(), PointerType::getUnqual(type)});
332   FunctionType *fn_type = FunctionType::get(B.getInt32Ty(), fn_args, /*isVarArg=*/false);
333   Function *fn =
334       Function::Create(fn_type, GlobalValue::ExternalLinkage, name, mod);
335   auto arg_it = fn->arg_begin();
336   Argument *arg_out = &*arg_it;
337   ++arg_it;
338   arg_out->setName("out");
339   Argument *arg_len = &*arg_it;
340   ++arg_it;
341   arg_len->setName("len");
342   Argument *arg_in = &*arg_it;
343   ++arg_it;
344   arg_in->setName("in");
345 
346   BasicBlock *label_entry = BasicBlock::Create(*ctx_, "entry", fn);
347   B.SetInsertPoint(label_entry);
348 
349   map<string, Value *> locals{
350       {"nread", B.CreateAlloca(B.getInt64Ty())},
351   };
352   vector<Value *> args({arg_out, B.CreateZExt(arg_len, B.getInt64Ty()), nullptr});
353   string fmt;
354   parse_type(B, &args, &fmt, type, arg_in, locals, true);
355 
356   GlobalVariable *fmt_gvar = B.CreateGlobalString(fmt, "fmt");
357 
358   args[2] = createInBoundsGEP(B, fmt_gvar, vector<Value *>({B.getInt64(0), B.getInt64(0)}));
359 
360   if (0)
361     debug_printf(mod, B, "%d %p %p\n", vector<Value *>({arg_len, arg_out, arg_in}));
362 
363   vector<Type *> snprintf_fn_args({B.getInt8PtrTy(), B.getInt64Ty(), B.getInt8PtrTy()});
364   FunctionType *snprintf_fn_type = FunctionType::get(B.getInt32Ty(), snprintf_fn_args, /*isVarArg=*/true);
365   Function *snprintf_fn = mod->getFunction("snprintf");
366   if (!snprintf_fn)
367     snprintf_fn = Function::Create(snprintf_fn_type, GlobalValue::ExternalLinkage, "snprintf", mod);
368   snprintf_fn->setCallingConv(CallingConv::C);
369   snprintf_fn->addFnAttr(Attribute::NoUnwind);
370 
371   CallInst *call = B.CreateCall(snprintf_fn, args);
372   call->setTailCall(true);
373 
374   B.CreateRet(call);
375 
376   writers_[type] = name;
377   return name;
378 }
379 
finalize_rw(unique_ptr<Module> m)380 unique_ptr<ExecutionEngine> BPFModule::finalize_rw(unique_ptr<Module> m) {
381   Module *mod = &*m;
382 
383   run_pass_manager(*mod);
384 
385   string err;
386   EngineBuilder builder(move(m));
387   builder.setErrorStr(&err);
388 #if LLVM_VERSION_MAJOR <= 11
389   builder.setUseOrcMCJITReplacement(false);
390 #endif
391   auto engine = unique_ptr<ExecutionEngine>(builder.create());
392   if (!engine)
393     fprintf(stderr, "Could not create ExecutionEngine: %s\n", err.c_str());
394   return engine;
395 }
396 
annotate()397 int BPFModule::annotate() {
398   for (auto fn = mod_->getFunctionList().begin(); fn != mod_->getFunctionList().end(); ++fn)
399     if (!fn->hasFnAttribute(Attribute::NoInline))
400       fn->addFnAttr(Attribute::AlwaysInline);
401 
402   // separate module to hold the reader functions
403   auto m = ebpf::make_unique<Module>("sscanf", *ctx_);
404 
405   size_t id = 0;
406   Path path({id_});
407   for (auto it = ts_->lower_bound(path), up = ts_->upper_bound(path); it != up; ++it) {
408     TableDesc &table = it->second;
409     tables_.push_back(&it->second);
410     table_names_[table.name] = id++;
411     GlobalValue *gvar = mod_->getNamedValue(table.name);
412     if (!gvar) continue;
413 #if LLVM_VERSION_MAJOR >= 14
414     {
415       Type *t = gvar->getValueType();
416       StructType *st = dyn_cast<StructType>(t);
417 #else
418     if (PointerType *pt = dyn_cast<PointerType>(gvar->getType())) {
419       StructType *st = dyn_cast<StructType>(pt->getElementType());
420 #endif
421       if (st) {
422         if (st->getNumElements() < 2) continue;
423         Type *key_type = st->elements()[0];
424         Type *leaf_type = st->elements()[1];
425 
426         using std::placeholders::_1;
427         using std::placeholders::_2;
428         using std::placeholders::_3;
429         table.key_sscanf = std::bind(&BPFModule::sscanf, this,
430                                      make_reader(&*m, key_type), _1, _2);
431         table.leaf_sscanf = std::bind(&BPFModule::sscanf, this,
432                                       make_reader(&*m, leaf_type), _1, _2);
433         table.key_snprintf = std::bind(&BPFModule::snprintf, this,
434                                        make_writer(&*m, key_type), _1, _2, _3);
435         table.leaf_snprintf =
436             std::bind(&BPFModule::snprintf, this, make_writer(&*m, leaf_type),
437                       _1, _2, _3);
438       }
439     }
440   }
441 
442   rw_engine_ = finalize_rw(move(m));
443   if (!rw_engine_)
444     return -1;
445   return 0;
446 }
447 
448 StatusTuple BPFModule::sscanf(string fn_name, const char *str, void *val) {
449   if (!rw_engine_enabled_)
450     return StatusTuple(-1, "rw_engine not enabled");
451   auto fn =
452       (int (*)(const char *, void *))rw_engine_->getFunctionAddress(fn_name);
453   if (!fn)
454     return StatusTuple(-1, "sscanf not available");
455   int rc = fn(str, val);
456   if (rc < 0)
457     return StatusTuple(rc, "error in sscanf: %s", std::strerror(errno));
458   return StatusTuple(rc);
459 }
460 
461 StatusTuple BPFModule::snprintf(string fn_name, char *str, size_t sz,
462                                 const void *val) {
463   if (!rw_engine_enabled_)
464     return StatusTuple(-1, "rw_engine not enabled");
465   auto fn = (int (*)(char *, size_t,
466                      const void *))rw_engine_->getFunctionAddress(fn_name);
467   if (!fn)
468     return StatusTuple(-1, "snprintf not available");
469   int rc = fn(str, sz, val);
470   if (rc < 0)
471     return StatusTuple(rc, "error in snprintf: %s", std::strerror(errno));
472   if ((size_t)rc == sz)
473     return StatusTuple(-1, "buffer of size %zd too small", sz);
474   return StatusTuple::OK();
475 }
476 
477 } // namespace ebpf
478