1 /*
2 * Copyright (c) 2015 PLUMgrid, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <map>
17 #include <string>
18 #include <vector>
19
20 #include <llvm/ExecutionEngine/MCJIT.h>
21 #include <llvm/IR/IRBuilder.h>
22 #include <llvm/Support/TargetSelect.h>
23
24 #include "common.h"
25 #include "bpf_module.h"
26 #include "table_storage.h"
27
28 namespace ebpf {
29
30 using std::map;
31 using std::move;
32 using std::string;
33 using std::unique_ptr;
34 using std::vector;
35 using namespace llvm;
36
bpf_module_rw_engine_enabled(void)37 bool bpf_module_rw_engine_enabled(void) {
38 return true;
39 }
40
initialize_rw_engine()41 void BPFModule::initialize_rw_engine() {
42 InitializeNativeTarget();
43 InitializeNativeTargetAsmPrinter();
44 }
45
cleanup_rw_engine()46 void BPFModule::cleanup_rw_engine() {
47 rw_engine_.reset();
48 }
49
createLoad(IRBuilder<> & B,Value * addr,bool isVolatile=false)50 static LoadInst *createLoad(IRBuilder<> &B, Value *addr, bool isVolatile = false)
51 {
52 #if LLVM_VERSION_MAJOR >= 15
53 if (isa<AllocaInst>(addr))
54 return B.CreateLoad(dyn_cast<AllocaInst>(addr)->getAllocatedType(), addr, isVolatile);
55 else
56 return B.CreateLoad(addr->getType(), addr, isVolatile);
57 #elif LLVM_VERSION_MAJOR >= 13
58 return B.CreateLoad(addr->getType()->getPointerElementType(), addr, isVolatile);
59 #else
60 return B.CreateLoad(addr, isVolatile);
61 #endif
62 }
63
createInBoundsGEP(IRBuilder<> & B,Value * ptr,ArrayRef<Value * > idxlist)64 static Value *createInBoundsGEP(IRBuilder<> &B, Value *ptr, ArrayRef<Value *>idxlist)
65 {
66 #if LLVM_VERSION_MAJOR >= 15
67 if (isa<GlobalValue>(ptr))
68 return B.CreateInBoundsGEP(dyn_cast<GlobalValue>(ptr)->getValueType(), ptr, idxlist);
69 else
70 return B.CreateInBoundsGEP(ptr->getType(), ptr, idxlist);
71 #elif LLVM_VERSION_MAJOR >= 13
72 return B.CreateInBoundsGEP(ptr->getType()->getScalarType()->getPointerElementType(),
73 ptr, idxlist);
74 #else
75 return B.CreateInBoundsGEP(ptr, idxlist);
76 #endif
77 }
78
debug_printf(Module * mod,IRBuilder<> & B,const string & fmt,vector<Value * > args)79 static void debug_printf(Module *mod, IRBuilder<> &B, const string &fmt, vector<Value *> args) {
80 GlobalVariable *fmt_gvar = B.CreateGlobalString(fmt, "fmt");
81 args.insert(args.begin(), createInBoundsGEP(B, fmt_gvar, vector<Value *>({B.getInt64(0), B.getInt64(0)})));
82 args.insert(args.begin(), B.getInt64((uintptr_t)stderr));
83 Function *fprintf_fn = mod->getFunction("fprintf");
84 if (!fprintf_fn) {
85 vector<Type *> fprintf_fn_args({B.getInt64Ty(), B.getInt8PtrTy()});
86 FunctionType *fprintf_fn_type = FunctionType::get(B.getInt32Ty(), fprintf_fn_args, /*isvarArg=*/true);
87 fprintf_fn = Function::Create(fprintf_fn_type, GlobalValue::ExternalLinkage, "fprintf", mod);
88 fprintf_fn->setCallingConv(CallingConv::C);
89 fprintf_fn->addFnAttr(Attribute::NoUnwind);
90 }
91 B.CreateCall(fprintf_fn, args);
92 }
93
finish_sscanf(IRBuilder<> & B,vector<Value * > * args,string * fmt,const map<string,Value * > & locals,bool exact_args)94 static void finish_sscanf(IRBuilder<> &B, vector<Value *> *args, string *fmt,
95 const map<string, Value *> &locals, bool exact_args) {
96 // fmt += "%n";
97 // int nread = 0;
98 // int n = sscanf(s, fmt, args..., &nread);
99 // if (n < 0) return -1;
100 // s = &s[nread];
101 Value *sptr = locals.at("sptr");
102 Value *nread = locals.at("nread");
103 Function *cur_fn = B.GetInsertBlock()->getParent();
104 Function *sscanf_fn = B.GetInsertBlock()->getModule()->getFunction("sscanf");
105 *fmt += "%n";
106 B.CreateStore(B.getInt32(0), nread);
107 GlobalVariable *fmt_gvar = B.CreateGlobalString(*fmt, "fmt");
108 (*args)[1] = createInBoundsGEP(B, fmt_gvar, {B.getInt64(0), B.getInt64(0)});
109 (*args)[0] = createLoad(B, sptr);
110 args->push_back(nread);
111 CallInst *call = B.CreateCall(sscanf_fn, *args);
112 call->setTailCall(true);
113
114 BasicBlock *label_true = BasicBlock::Create(B.getContext(), "", cur_fn);
115 BasicBlock *label_false = BasicBlock::Create(B.getContext(), "", cur_fn);
116
117 // exact_args means fail if don't consume exact number of "%" inputs
118 // exact_args is disabled for string parsing (empty case)
119 Value *cond = exact_args ? B.CreateICmpNE(call, B.getInt32(args->size() - 3))
120 : B.CreateICmpSLT(call, B.getInt32(0));
121 B.CreateCondBr(cond, label_true, label_false);
122
123 B.SetInsertPoint(label_true);
124 B.CreateRet(B.getInt32(-1));
125
126 B.SetInsertPoint(label_false);
127 // s = &s[nread];
128 B.CreateStore(
129 createInBoundsGEP(B, createLoad(B, sptr), {createLoad(B, nread, true)}), sptr);
130
131 args->resize(2);
132 fmt->clear();
133 }
134
135 // recursive helper to capture the arguments
parse_type(IRBuilder<> & B,vector<Value * > * args,string * fmt,Type * type,Value * out,const map<string,Value * > & locals,bool is_writer)136 static void parse_type(IRBuilder<> &B, vector<Value *> *args, string *fmt,
137 Type *type, Value *out,
138 const map<string, Value *> &locals, bool is_writer) {
139 if (StructType *st = dyn_cast<StructType>(type)) {
140 *fmt += "{ ";
141 unsigned idx = 0;
142 for (auto field : st->elements()) {
143 parse_type(B, args, fmt, field, B.CreateStructGEP(type, out, idx++),
144 locals, is_writer);
145 *fmt += " ";
146 }
147 *fmt += "}";
148 } else if (ArrayType *at = dyn_cast<ArrayType>(type)) {
149 if (at->getElementType() == B.getInt8Ty()) {
150 // treat i8[] as a char string instead of as an array of u8's
151 if (is_writer) {
152 *fmt += "\"%s\"";
153 args->push_back(out);
154 } else {
155 // When reading strings, scanf doesn't support empty "", so we need to
156 // break this up into multiple scanf calls. To understand it, let's take
157 // an example:
158 // struct Event {
159 // u32 a;
160 // struct {
161 // char x[64];
162 // int y;
163 // } b[2];
164 // u32 c;
165 // };
166 // The writer string would look like:
167 // "{ 0x%x [ { \"%s\" 0x%x } { \"%s\" 0x%x } ] 0x%x }"
168 // But the reader string needs to restart at each \"\".
169 // reader0(const char *s, struct Event *val) {
170 // int nread, rc;
171 // nread = 0;
172 // rc = sscanf(s, "{ %i [ { \"%n", &val->a, &nread);
173 // if (rc != 1) return -1;
174 // s += nread; nread = 0;
175 // rc = sscanf(s, "%[^\"]%n", &val->b[0].x, &nread);
176 // if (rc < 0) return -1;
177 // s += nread; nread = 0;
178 // rc = sscanf(s, "\" %i } { \"%n", &val->b[0].y, &nread);
179 // if (rc != 1) return -1;
180 // s += nread; nread = 0;
181 // rc = sscanf(s, "%[^\"]%n", &val->b[1].x, &nread);
182 // if (rc < 0) return -1;
183 // s += nread; nread = 0;
184 // rc = sscanf(s, "\" %i } ] %i }%n", &val->b[1].y, &val->c, &nread);
185 // if (rc != 2) return -1;
186 // s += nread; nread = 0;
187 // return 0;
188 // }
189 *fmt += "\"";
190 finish_sscanf(B, args, fmt, locals, true);
191
192 *fmt = "%[^\"]";
193 args->push_back(out);
194 finish_sscanf(B, args, fmt, locals, false);
195
196 *fmt = "\"";
197 }
198 } else {
199 *fmt += "[ ";
200 for (size_t i = 0; i < at->getNumElements(); ++i) {
201 parse_type(B, args, fmt, at->getElementType(),
202 B.CreateStructGEP(type, out, i), locals, is_writer);
203 *fmt += " ";
204 }
205 *fmt += "]";
206 }
207 } else if (isa<PointerType>(type)) {
208 *fmt += "0xl";
209 if (is_writer)
210 *fmt += "x";
211 else
212 *fmt += "i";
213 } else if (IntegerType *it = dyn_cast<IntegerType>(type)) {
214 if (is_writer)
215 *fmt += "0x";
216 if (it->getBitWidth() <= 8)
217 *fmt += "%hh";
218 else if (it->getBitWidth() <= 16)
219 *fmt += "%h";
220 else if (it->getBitWidth() <= 32)
221 *fmt += "%";
222 else
223 *fmt += "%l";
224 if (is_writer)
225 *fmt += "x";
226 else
227 *fmt += "i";
228 args->push_back(is_writer ? createLoad(B, out) : out);
229 }
230 }
231
232 // make_reader generates a dynamic function in the instruction set of the host
233 // (not bpf) that is able to convert c-strings in the pretty-print format of
234 // make_writer back into binary representations. The encoding of the string
235 // takes the llvm ir structure format, which closely maps the c structure but
236 // not exactly (no support for unions for instance).
237 // The general algorithm is:
238 // pod types (u8..u64) <= %i
239 // array types
240 // u8[] no nested quotes :( <= "..."
241 // !u8[] <= [ %i %i ... ]
242 // struct types
243 // struct { u8 a; u64 b; } <= { %i %i }
244 // nesting is supported
245 // struct { struct { u8 a[]; }; } <= { "" }
246 // struct { struct { u64 a[]; }; } <= { [ %i %i .. ] }
make_reader(Module * mod,Type * type)247 string BPFModule::make_reader(Module *mod, Type *type) {
248 auto fn_it = readers_.find(type);
249 if (fn_it != readers_.end())
250 return fn_it->second;
251
252 // int read(const char *in, Type *out) {
253 // int n = sscanf(in, "{ %i ... }", &out->field1, ...);
254 // if (n != num_fields) return -1;
255 // return 0;
256 // }
257
258 IRBuilder<> B(*ctx_);
259
260 FunctionType *sscanf_fn_type = FunctionType::get(
261 B.getInt32Ty(), {B.getInt8PtrTy(), B.getInt8PtrTy()}, /*isVarArg=*/true);
262 Function *sscanf_fn = mod->getFunction("sscanf");
263 if (!sscanf_fn) {
264 sscanf_fn = Function::Create(sscanf_fn_type, GlobalValue::ExternalLinkage,
265 "sscanf", mod);
266 sscanf_fn->setCallingConv(CallingConv::C);
267 sscanf_fn->addFnAttr(Attribute::NoUnwind);
268 }
269
270 string name = "reader" + std::to_string(readers_.size());
271 vector<Type *> fn_args({B.getInt8PtrTy(), PointerType::getUnqual(type)});
272 FunctionType *fn_type = FunctionType::get(B.getInt32Ty(), fn_args, /*isVarArg=*/false);
273 Function *fn =
274 Function::Create(fn_type, GlobalValue::ExternalLinkage, name, mod);
275 auto arg_it = fn->arg_begin();
276 Argument *arg_in = &*arg_it;
277 ++arg_it;
278 arg_in->setName("in");
279 Argument *arg_out = &*arg_it;
280 ++arg_it;
281 arg_out->setName("out");
282
283 BasicBlock *label_entry = BasicBlock::Create(*ctx_, "entry", fn);
284 B.SetInsertPoint(label_entry);
285
286 Value *nread = B.CreateAlloca(B.getInt32Ty());
287 Value *sptr = B.CreateAlloca(B.getInt8PtrTy());
288 map<string, Value *> locals{{"nread", nread}, {"sptr", sptr}};
289 B.CreateStore(arg_in, sptr);
290 vector<Value *> args({nullptr, nullptr});
291 string fmt;
292 parse_type(B, &args, &fmt, type, arg_out, locals, false);
293
294 if (0)
295 debug_printf(mod, B, "%p %p\n", vector<Value *>({arg_in, arg_out}));
296
297 finish_sscanf(B, &args, &fmt, locals, true);
298
299 B.CreateRet(B.getInt32(0));
300
301 readers_[type] = name;
302 return name;
303 }
304
305 // make_writer generates a dynamic function in the instruction set of the host
306 // (not bpf) that is able to pretty-print key/leaf entries as a c-string. The
307 // encoding of the string takes the llvm ir structure format, which closely maps
308 // the c structure but not exactly (no support for unions for instance).
309 // The general algorithm is:
310 // pod types (u8..u64) => 0x%x
311 // array types
312 // u8[] => "..."
313 // !u8[] => [ 0x%x 0x%x ... ]
314 // struct types
315 // struct { u8 a; u64 b; } => { 0x%x 0x%x }
316 // nesting is supported
317 // struct { struct { u8 a[]; }; } => { "" }
318 // struct { struct { u64 a[]; }; } => { [ 0x%x 0x%x .. ] }
make_writer(Module * mod,Type * type)319 string BPFModule::make_writer(Module *mod, Type *type) {
320 auto fn_it = writers_.find(type);
321 if (fn_it != writers_.end())
322 return fn_it->second;
323
324 // int write(int len, char *out, Type *in) {
325 // return snprintf(out, len, "{ %i ... }", out->field1, ...);
326 // }
327
328 IRBuilder<> B(*ctx_);
329
330 string name = "writer" + std::to_string(writers_.size());
331 vector<Type *> fn_args({B.getInt8PtrTy(), B.getInt64Ty(), PointerType::getUnqual(type)});
332 FunctionType *fn_type = FunctionType::get(B.getInt32Ty(), fn_args, /*isVarArg=*/false);
333 Function *fn =
334 Function::Create(fn_type, GlobalValue::ExternalLinkage, name, mod);
335 auto arg_it = fn->arg_begin();
336 Argument *arg_out = &*arg_it;
337 ++arg_it;
338 arg_out->setName("out");
339 Argument *arg_len = &*arg_it;
340 ++arg_it;
341 arg_len->setName("len");
342 Argument *arg_in = &*arg_it;
343 ++arg_it;
344 arg_in->setName("in");
345
346 BasicBlock *label_entry = BasicBlock::Create(*ctx_, "entry", fn);
347 B.SetInsertPoint(label_entry);
348
349 map<string, Value *> locals{
350 {"nread", B.CreateAlloca(B.getInt64Ty())},
351 };
352 vector<Value *> args({arg_out, B.CreateZExt(arg_len, B.getInt64Ty()), nullptr});
353 string fmt;
354 parse_type(B, &args, &fmt, type, arg_in, locals, true);
355
356 GlobalVariable *fmt_gvar = B.CreateGlobalString(fmt, "fmt");
357
358 args[2] = createInBoundsGEP(B, fmt_gvar, vector<Value *>({B.getInt64(0), B.getInt64(0)}));
359
360 if (0)
361 debug_printf(mod, B, "%d %p %p\n", vector<Value *>({arg_len, arg_out, arg_in}));
362
363 vector<Type *> snprintf_fn_args({B.getInt8PtrTy(), B.getInt64Ty(), B.getInt8PtrTy()});
364 FunctionType *snprintf_fn_type = FunctionType::get(B.getInt32Ty(), snprintf_fn_args, /*isVarArg=*/true);
365 Function *snprintf_fn = mod->getFunction("snprintf");
366 if (!snprintf_fn)
367 snprintf_fn = Function::Create(snprintf_fn_type, GlobalValue::ExternalLinkage, "snprintf", mod);
368 snprintf_fn->setCallingConv(CallingConv::C);
369 snprintf_fn->addFnAttr(Attribute::NoUnwind);
370
371 CallInst *call = B.CreateCall(snprintf_fn, args);
372 call->setTailCall(true);
373
374 B.CreateRet(call);
375
376 writers_[type] = name;
377 return name;
378 }
379
finalize_rw(unique_ptr<Module> m)380 unique_ptr<ExecutionEngine> BPFModule::finalize_rw(unique_ptr<Module> m) {
381 Module *mod = &*m;
382
383 run_pass_manager(*mod);
384
385 string err;
386 EngineBuilder builder(move(m));
387 builder.setErrorStr(&err);
388 #if LLVM_VERSION_MAJOR <= 11
389 builder.setUseOrcMCJITReplacement(false);
390 #endif
391 auto engine = unique_ptr<ExecutionEngine>(builder.create());
392 if (!engine)
393 fprintf(stderr, "Could not create ExecutionEngine: %s\n", err.c_str());
394 return engine;
395 }
396
annotate()397 int BPFModule::annotate() {
398 for (auto fn = mod_->getFunctionList().begin(); fn != mod_->getFunctionList().end(); ++fn)
399 if (!fn->hasFnAttribute(Attribute::NoInline))
400 fn->addFnAttr(Attribute::AlwaysInline);
401
402 // separate module to hold the reader functions
403 auto m = ebpf::make_unique<Module>("sscanf", *ctx_);
404
405 size_t id = 0;
406 Path path({id_});
407 for (auto it = ts_->lower_bound(path), up = ts_->upper_bound(path); it != up; ++it) {
408 TableDesc &table = it->second;
409 tables_.push_back(&it->second);
410 table_names_[table.name] = id++;
411 GlobalValue *gvar = mod_->getNamedValue(table.name);
412 if (!gvar) continue;
413 #if LLVM_VERSION_MAJOR >= 14
414 {
415 Type *t = gvar->getValueType();
416 StructType *st = dyn_cast<StructType>(t);
417 #else
418 if (PointerType *pt = dyn_cast<PointerType>(gvar->getType())) {
419 StructType *st = dyn_cast<StructType>(pt->getElementType());
420 #endif
421 if (st) {
422 if (st->getNumElements() < 2) continue;
423 Type *key_type = st->elements()[0];
424 Type *leaf_type = st->elements()[1];
425
426 using std::placeholders::_1;
427 using std::placeholders::_2;
428 using std::placeholders::_3;
429 table.key_sscanf = std::bind(&BPFModule::sscanf, this,
430 make_reader(&*m, key_type), _1, _2);
431 table.leaf_sscanf = std::bind(&BPFModule::sscanf, this,
432 make_reader(&*m, leaf_type), _1, _2);
433 table.key_snprintf = std::bind(&BPFModule::snprintf, this,
434 make_writer(&*m, key_type), _1, _2, _3);
435 table.leaf_snprintf =
436 std::bind(&BPFModule::snprintf, this, make_writer(&*m, leaf_type),
437 _1, _2, _3);
438 }
439 }
440 }
441
442 rw_engine_ = finalize_rw(move(m));
443 if (!rw_engine_)
444 return -1;
445 return 0;
446 }
447
448 StatusTuple BPFModule::sscanf(string fn_name, const char *str, void *val) {
449 if (!rw_engine_enabled_)
450 return StatusTuple(-1, "rw_engine not enabled");
451 auto fn =
452 (int (*)(const char *, void *))rw_engine_->getFunctionAddress(fn_name);
453 if (!fn)
454 return StatusTuple(-1, "sscanf not available");
455 int rc = fn(str, val);
456 if (rc < 0)
457 return StatusTuple(rc, "error in sscanf: %s", std::strerror(errno));
458 return StatusTuple(rc);
459 }
460
461 StatusTuple BPFModule::snprintf(string fn_name, char *str, size_t sz,
462 const void *val) {
463 if (!rw_engine_enabled_)
464 return StatusTuple(-1, "rw_engine not enabled");
465 auto fn = (int (*)(char *, size_t,
466 const void *))rw_engine_->getFunctionAddress(fn_name);
467 if (!fn)
468 return StatusTuple(-1, "snprintf not available");
469 int rc = fn(str, sz, val);
470 if (rc < 0)
471 return StatusTuple(rc, "error in snprintf: %s", std::strerror(errno));
472 if ((size_t)rc == sz)
473 return StatusTuple(-1, "buffer of size %zd too small", sz);
474 return StatusTuple::OK();
475 }
476
477 } // namespace ebpf
478