1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for LoongArch
4  *
5  * Copyright (C) 2022 Loongson Technology Corporation Limited
6  */
7 #include "bpf_jit.h"
8 
9 #define REG_TCC		LOONGARCH_GPR_A6
10 #define TCC_SAVED	LOONGARCH_GPR_S5
11 
12 #define SAVE_RA		BIT(0)
13 #define SAVE_TCC	BIT(1)
14 
15 static const int regmap[] = {
16 	/* return value from in-kernel function, and exit value for eBPF program */
17 	[BPF_REG_0] = LOONGARCH_GPR_A5,
18 	/* arguments from eBPF program to in-kernel function */
19 	[BPF_REG_1] = LOONGARCH_GPR_A0,
20 	[BPF_REG_2] = LOONGARCH_GPR_A1,
21 	[BPF_REG_3] = LOONGARCH_GPR_A2,
22 	[BPF_REG_4] = LOONGARCH_GPR_A3,
23 	[BPF_REG_5] = LOONGARCH_GPR_A4,
24 	/* callee saved registers that in-kernel function will preserve */
25 	[BPF_REG_6] = LOONGARCH_GPR_S0,
26 	[BPF_REG_7] = LOONGARCH_GPR_S1,
27 	[BPF_REG_8] = LOONGARCH_GPR_S2,
28 	[BPF_REG_9] = LOONGARCH_GPR_S3,
29 	/* read-only frame pointer to access stack */
30 	[BPF_REG_FP] = LOONGARCH_GPR_S4,
31 	/* temporary register for blinding constants */
32 	[BPF_REG_AX] = LOONGARCH_GPR_T0,
33 };
34 
mark_call(struct jit_ctx * ctx)35 static void mark_call(struct jit_ctx *ctx)
36 {
37 	ctx->flags |= SAVE_RA;
38 }
39 
mark_tail_call(struct jit_ctx * ctx)40 static void mark_tail_call(struct jit_ctx *ctx)
41 {
42 	ctx->flags |= SAVE_TCC;
43 }
44 
seen_call(struct jit_ctx * ctx)45 static bool seen_call(struct jit_ctx *ctx)
46 {
47 	return (ctx->flags & SAVE_RA);
48 }
49 
seen_tail_call(struct jit_ctx * ctx)50 static bool seen_tail_call(struct jit_ctx *ctx)
51 {
52 	return (ctx->flags & SAVE_TCC);
53 }
54 
tail_call_reg(struct jit_ctx * ctx)55 static u8 tail_call_reg(struct jit_ctx *ctx)
56 {
57 	if (seen_call(ctx))
58 		return TCC_SAVED;
59 
60 	return REG_TCC;
61 }
62 
63 /*
64  * eBPF prog stack layout:
65  *
66  *                                        high
67  * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
68  *                            |           $ra           |
69  *                            +-------------------------+
70  *                            |           $fp           |
71  *                            +-------------------------+
72  *                            |           $s0           |
73  *                            +-------------------------+
74  *                            |           $s1           |
75  *                            +-------------------------+
76  *                            |           $s2           |
77  *                            +-------------------------+
78  *                            |           $s3           |
79  *                            +-------------------------+
80  *                            |           $s4           |
81  *                            +-------------------------+
82  *                            |           $s5           |
83  *                            +-------------------------+ <--BPF_REG_FP
84  *                            |  prog->aux->stack_depth |
85  *                            |        (optional)       |
86  * current $sp -------------> +-------------------------+
87  *                                        low
88  */
build_prologue(struct jit_ctx * ctx)89 static void build_prologue(struct jit_ctx *ctx)
90 {
91 	int stack_adjust = 0, store_offset, bpf_stack_adjust;
92 
93 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
94 
95 	/* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
96 	stack_adjust += sizeof(long) * 8;
97 
98 	stack_adjust = round_up(stack_adjust, 16);
99 	stack_adjust += bpf_stack_adjust;
100 
101 	/*
102 	 * First instruction initializes the tail call count (TCC).
103 	 * On tail call we skip this instruction, and the TCC is
104 	 * passed in REG_TCC from the caller.
105 	 */
106 	emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
107 
108 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
109 
110 	store_offset = stack_adjust - sizeof(long);
111 	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
112 
113 	store_offset -= sizeof(long);
114 	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
115 
116 	store_offset -= sizeof(long);
117 	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
118 
119 	store_offset -= sizeof(long);
120 	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
121 
122 	store_offset -= sizeof(long);
123 	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
124 
125 	store_offset -= sizeof(long);
126 	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
127 
128 	store_offset -= sizeof(long);
129 	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
130 
131 	store_offset -= sizeof(long);
132 	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
133 
134 	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
135 
136 	if (bpf_stack_adjust)
137 		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
138 
139 	/*
140 	 * Program contains calls and tail calls, so REG_TCC need
141 	 * to be saved across calls.
142 	 */
143 	if (seen_tail_call(ctx) && seen_call(ctx))
144 		move_reg(ctx, TCC_SAVED, REG_TCC);
145 	else
146 		emit_insn(ctx, nop);
147 
148 	ctx->stack_size = stack_adjust;
149 }
150 
__build_epilogue(struct jit_ctx * ctx,bool is_tail_call)151 static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
152 {
153 	int stack_adjust = ctx->stack_size;
154 	int load_offset;
155 
156 	load_offset = stack_adjust - sizeof(long);
157 	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
158 
159 	load_offset -= sizeof(long);
160 	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
161 
162 	load_offset -= sizeof(long);
163 	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
164 
165 	load_offset -= sizeof(long);
166 	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
167 
168 	load_offset -= sizeof(long);
169 	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
170 
171 	load_offset -= sizeof(long);
172 	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
173 
174 	load_offset -= sizeof(long);
175 	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
176 
177 	load_offset -= sizeof(long);
178 	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
179 
180 	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
181 
182 	if (!is_tail_call) {
183 		/* Set return value */
184 		emit_insn(ctx, addiw, LOONGARCH_GPR_A0, regmap[BPF_REG_0], 0);
185 		/* Return to the caller */
186 		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_RA, 0);
187 	} else {
188 		/*
189 		 * Call the next bpf prog and skip the first instruction
190 		 * of TCC initialization.
191 		 */
192 		emit_insn(ctx, jirl, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_T3, 1);
193 	}
194 }
195 
build_epilogue(struct jit_ctx * ctx)196 static void build_epilogue(struct jit_ctx *ctx)
197 {
198 	__build_epilogue(ctx, false);
199 }
200 
bpf_jit_supports_kfunc_call(void)201 bool bpf_jit_supports_kfunc_call(void)
202 {
203 	return true;
204 }
205 
bpf_jit_supports_far_kfunc_call(void)206 bool bpf_jit_supports_far_kfunc_call(void)
207 {
208 	return true;
209 }
210 
211 /* initialized on the first pass of build_body() */
212 static int out_offset = -1;
emit_bpf_tail_call(struct jit_ctx * ctx)213 static int emit_bpf_tail_call(struct jit_ctx *ctx)
214 {
215 	int off;
216 	u8 tcc = tail_call_reg(ctx);
217 	u8 a1 = LOONGARCH_GPR_A1;
218 	u8 a2 = LOONGARCH_GPR_A2;
219 	u8 t1 = LOONGARCH_GPR_T1;
220 	u8 t2 = LOONGARCH_GPR_T2;
221 	u8 t3 = LOONGARCH_GPR_T3;
222 	const int idx0 = ctx->idx;
223 
224 #define cur_offset (ctx->idx - idx0)
225 #define jmp_offset (out_offset - (cur_offset))
226 
227 	/*
228 	 * a0: &ctx
229 	 * a1: &array
230 	 * a2: index
231 	 *
232 	 * if (index >= array->map.max_entries)
233 	 *	 goto out;
234 	 */
235 	off = offsetof(struct bpf_array, map.max_entries);
236 	emit_insn(ctx, ldwu, t1, a1, off);
237 	/* bgeu $a2, $t1, jmp_offset */
238 	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
239 		goto toofar;
240 
241 	/*
242 	 * if (--TCC < 0)
243 	 *	 goto out;
244 	 */
245 	emit_insn(ctx, addid, REG_TCC, tcc, -1);
246 	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
247 		goto toofar;
248 
249 	/*
250 	 * prog = array->ptrs[index];
251 	 * if (!prog)
252 	 *	 goto out;
253 	 */
254 	emit_insn(ctx, alsld, t2, a2, a1, 2);
255 	off = offsetof(struct bpf_array, ptrs);
256 	emit_insn(ctx, ldd, t2, t2, off);
257 	/* beq $t2, $zero, jmp_offset */
258 	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
259 		goto toofar;
260 
261 	/* goto *(prog->bpf_func + 4); */
262 	off = offsetof(struct bpf_prog, bpf_func);
263 	emit_insn(ctx, ldd, t3, t2, off);
264 	__build_epilogue(ctx, true);
265 
266 	/* out: */
267 	if (out_offset == -1)
268 		out_offset = cur_offset;
269 	if (cur_offset != out_offset) {
270 		pr_err_once("tail_call out_offset = %d, expected %d!\n",
271 			    cur_offset, out_offset);
272 		return -1;
273 	}
274 
275 	return 0;
276 
277 toofar:
278 	pr_info_once("tail_call: jump too far\n");
279 	return -1;
280 #undef cur_offset
281 #undef jmp_offset
282 }
283 
emit_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)284 static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
285 {
286 	const u8 t1 = LOONGARCH_GPR_T1;
287 	const u8 t2 = LOONGARCH_GPR_T2;
288 	const u8 t3 = LOONGARCH_GPR_T3;
289 	const u8 r0 = regmap[BPF_REG_0];
290 	const u8 src = regmap[insn->src_reg];
291 	const u8 dst = regmap[insn->dst_reg];
292 	const s16 off = insn->off;
293 	const s32 imm = insn->imm;
294 	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
295 
296 	move_imm(ctx, t1, off, false);
297 	emit_insn(ctx, addd, t1, dst, t1);
298 	move_reg(ctx, t3, src);
299 
300 	switch (imm) {
301 	/* lock *(size *)(dst + off) <op>= src */
302 	case BPF_ADD:
303 		if (isdw)
304 			emit_insn(ctx, amaddd, t2, t1, src);
305 		else
306 			emit_insn(ctx, amaddw, t2, t1, src);
307 		break;
308 	case BPF_AND:
309 		if (isdw)
310 			emit_insn(ctx, amandd, t2, t1, src);
311 		else
312 			emit_insn(ctx, amandw, t2, t1, src);
313 		break;
314 	case BPF_OR:
315 		if (isdw)
316 			emit_insn(ctx, amord, t2, t1, src);
317 		else
318 			emit_insn(ctx, amorw, t2, t1, src);
319 		break;
320 	case BPF_XOR:
321 		if (isdw)
322 			emit_insn(ctx, amxord, t2, t1, src);
323 		else
324 			emit_insn(ctx, amxorw, t2, t1, src);
325 		break;
326 	/* src = atomic_fetch_<op>(dst + off, src) */
327 	case BPF_ADD | BPF_FETCH:
328 		if (isdw) {
329 			emit_insn(ctx, amaddd, src, t1, t3);
330 		} else {
331 			emit_insn(ctx, amaddw, src, t1, t3);
332 			emit_zext_32(ctx, src, true);
333 		}
334 		break;
335 	case BPF_AND | BPF_FETCH:
336 		if (isdw) {
337 			emit_insn(ctx, amandd, src, t1, t3);
338 		} else {
339 			emit_insn(ctx, amandw, src, t1, t3);
340 			emit_zext_32(ctx, src, true);
341 		}
342 		break;
343 	case BPF_OR | BPF_FETCH:
344 		if (isdw) {
345 			emit_insn(ctx, amord, src, t1, t3);
346 		} else {
347 			emit_insn(ctx, amorw, src, t1, t3);
348 			emit_zext_32(ctx, src, true);
349 		}
350 		break;
351 	case BPF_XOR | BPF_FETCH:
352 		if (isdw) {
353 			emit_insn(ctx, amxord, src, t1, t3);
354 		} else {
355 			emit_insn(ctx, amxorw, src, t1, t3);
356 			emit_zext_32(ctx, src, true);
357 		}
358 		break;
359 	/* src = atomic_xchg(dst + off, src); */
360 	case BPF_XCHG:
361 		if (isdw) {
362 			emit_insn(ctx, amswapd, src, t1, t3);
363 		} else {
364 			emit_insn(ctx, amswapw, src, t1, t3);
365 			emit_zext_32(ctx, src, true);
366 		}
367 		break;
368 	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
369 	case BPF_CMPXCHG:
370 		move_reg(ctx, t2, r0);
371 		if (isdw) {
372 			emit_insn(ctx, lld, r0, t1, 0);
373 			emit_insn(ctx, bne, t2, r0, 4);
374 			move_reg(ctx, t3, src);
375 			emit_insn(ctx, scd, t3, t1, 0);
376 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
377 		} else {
378 			emit_insn(ctx, llw, r0, t1, 0);
379 			emit_zext_32(ctx, t2, true);
380 			emit_zext_32(ctx, r0, true);
381 			emit_insn(ctx, bne, t2, r0, 4);
382 			move_reg(ctx, t3, src);
383 			emit_insn(ctx, scw, t3, t1, 0);
384 			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
385 			emit_zext_32(ctx, r0, true);
386 		}
387 		break;
388 	}
389 }
390 
is_signed_bpf_cond(u8 cond)391 static bool is_signed_bpf_cond(u8 cond)
392 {
393 	return cond == BPF_JSGT || cond == BPF_JSLT ||
394 	       cond == BPF_JSGE || cond == BPF_JSLE;
395 }
396 
397 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
398 #define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
399 
ex_handler_bpf(const struct exception_table_entry * ex,struct pt_regs * regs)400 bool ex_handler_bpf(const struct exception_table_entry *ex,
401 		    struct pt_regs *regs)
402 {
403 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
404 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
405 
406 	regs->regs[dst_reg] = 0;
407 	regs->csr_era = (unsigned long)&ex->fixup - offset;
408 
409 	return true;
410 }
411 
412 /* For accesses to BTF pointers, add an entry to the exception table */
add_exception_handler(const struct bpf_insn * insn,struct jit_ctx * ctx,int dst_reg)413 static int add_exception_handler(const struct bpf_insn *insn,
414 				 struct jit_ctx *ctx,
415 				 int dst_reg)
416 {
417 	unsigned long pc;
418 	off_t offset;
419 	struct exception_table_entry *ex;
420 
421 	if (!ctx->image || !ctx->prog->aux->extable)
422 		return 0;
423 
424 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
425 	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
426 		return 0;
427 
428 	if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
429 		return -EINVAL;
430 
431 	ex = &ctx->prog->aux->extable[ctx->num_exentries];
432 	pc = (unsigned long)&ctx->image[ctx->idx - 1];
433 
434 	offset = pc - (long)&ex->insn;
435 	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
436 		return -ERANGE;
437 
438 	ex->insn = offset;
439 
440 	/*
441 	 * Since the extable follows the program, the fixup offset is always
442 	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
443 	 * to keep things simple, and put the destination register in the upper
444 	 * bits. We don't need to worry about buildtime or runtime sort
445 	 * modifying the upper bits because the table is already sorted, and
446 	 * isn't part of the main exception table.
447 	 */
448 	offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
449 	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
450 		return -ERANGE;
451 
452 	ex->type = EX_TYPE_BPF;
453 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
454 
455 	ctx->num_exentries++;
456 
457 	return 0;
458 }
459 
build_insn(const struct bpf_insn * insn,struct jit_ctx * ctx,bool extra_pass)460 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
461 {
462 	u8 tm = -1;
463 	u64 func_addr;
464 	bool func_addr_fixed, sign_extend;
465 	int i = insn - ctx->prog->insnsi;
466 	int ret, jmp_offset;
467 	const u8 code = insn->code;
468 	const u8 cond = BPF_OP(code);
469 	const u8 t1 = LOONGARCH_GPR_T1;
470 	const u8 t2 = LOONGARCH_GPR_T2;
471 	const u8 src = regmap[insn->src_reg];
472 	const u8 dst = regmap[insn->dst_reg];
473 	const s16 off = insn->off;
474 	const s32 imm = insn->imm;
475 	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
476 
477 	switch (code) {
478 	/* dst = src */
479 	case BPF_ALU | BPF_MOV | BPF_X:
480 	case BPF_ALU64 | BPF_MOV | BPF_X:
481 		switch (off) {
482 		case 0:
483 			move_reg(ctx, dst, src);
484 			emit_zext_32(ctx, dst, is32);
485 			break;
486 		case 8:
487 			move_reg(ctx, t1, src);
488 			emit_insn(ctx, extwb, dst, t1);
489 			emit_zext_32(ctx, dst, is32);
490 			break;
491 		case 16:
492 			move_reg(ctx, t1, src);
493 			emit_insn(ctx, extwh, dst, t1);
494 			emit_zext_32(ctx, dst, is32);
495 			break;
496 		case 32:
497 			emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
498 			break;
499 		}
500 		break;
501 
502 	/* dst = imm */
503 	case BPF_ALU | BPF_MOV | BPF_K:
504 	case BPF_ALU64 | BPF_MOV | BPF_K:
505 		move_imm(ctx, dst, imm, is32);
506 		break;
507 
508 	/* dst = dst + src */
509 	case BPF_ALU | BPF_ADD | BPF_X:
510 	case BPF_ALU64 | BPF_ADD | BPF_X:
511 		emit_insn(ctx, addd, dst, dst, src);
512 		emit_zext_32(ctx, dst, is32);
513 		break;
514 
515 	/* dst = dst + imm */
516 	case BPF_ALU | BPF_ADD | BPF_K:
517 	case BPF_ALU64 | BPF_ADD | BPF_K:
518 		if (is_signed_imm12(imm)) {
519 			emit_insn(ctx, addid, dst, dst, imm);
520 		} else {
521 			move_imm(ctx, t1, imm, is32);
522 			emit_insn(ctx, addd, dst, dst, t1);
523 		}
524 		emit_zext_32(ctx, dst, is32);
525 		break;
526 
527 	/* dst = dst - src */
528 	case BPF_ALU | BPF_SUB | BPF_X:
529 	case BPF_ALU64 | BPF_SUB | BPF_X:
530 		emit_insn(ctx, subd, dst, dst, src);
531 		emit_zext_32(ctx, dst, is32);
532 		break;
533 
534 	/* dst = dst - imm */
535 	case BPF_ALU | BPF_SUB | BPF_K:
536 	case BPF_ALU64 | BPF_SUB | BPF_K:
537 		if (is_signed_imm12(-imm)) {
538 			emit_insn(ctx, addid, dst, dst, -imm);
539 		} else {
540 			move_imm(ctx, t1, imm, is32);
541 			emit_insn(ctx, subd, dst, dst, t1);
542 		}
543 		emit_zext_32(ctx, dst, is32);
544 		break;
545 
546 	/* dst = dst * src */
547 	case BPF_ALU | BPF_MUL | BPF_X:
548 	case BPF_ALU64 | BPF_MUL | BPF_X:
549 		emit_insn(ctx, muld, dst, dst, src);
550 		emit_zext_32(ctx, dst, is32);
551 		break;
552 
553 	/* dst = dst * imm */
554 	case BPF_ALU | BPF_MUL | BPF_K:
555 	case BPF_ALU64 | BPF_MUL | BPF_K:
556 		move_imm(ctx, t1, imm, is32);
557 		emit_insn(ctx, muld, dst, dst, t1);
558 		emit_zext_32(ctx, dst, is32);
559 		break;
560 
561 	/* dst = dst / src */
562 	case BPF_ALU | BPF_DIV | BPF_X:
563 	case BPF_ALU64 | BPF_DIV | BPF_X:
564 		if (!off) {
565 			emit_zext_32(ctx, dst, is32);
566 			move_reg(ctx, t1, src);
567 			emit_zext_32(ctx, t1, is32);
568 			emit_insn(ctx, divdu, dst, dst, t1);
569 			emit_zext_32(ctx, dst, is32);
570 		} else {
571 			emit_sext_32(ctx, dst, is32);
572 			move_reg(ctx, t1, src);
573 			emit_sext_32(ctx, t1, is32);
574 			emit_insn(ctx, divd, dst, dst, t1);
575 			emit_sext_32(ctx, dst, is32);
576 		}
577 		break;
578 
579 	/* dst = dst / imm */
580 	case BPF_ALU | BPF_DIV | BPF_K:
581 	case BPF_ALU64 | BPF_DIV | BPF_K:
582 		if (!off) {
583 			move_imm(ctx, t1, imm, is32);
584 			emit_zext_32(ctx, dst, is32);
585 			emit_insn(ctx, divdu, dst, dst, t1);
586 			emit_zext_32(ctx, dst, is32);
587 		} else {
588 			move_imm(ctx, t1, imm, false);
589 			emit_sext_32(ctx, t1, is32);
590 			emit_sext_32(ctx, dst, is32);
591 			emit_insn(ctx, divd, dst, dst, t1);
592 			emit_sext_32(ctx, dst, is32);
593 		}
594 		break;
595 
596 	/* dst = dst % src */
597 	case BPF_ALU | BPF_MOD | BPF_X:
598 	case BPF_ALU64 | BPF_MOD | BPF_X:
599 		if (!off) {
600 			emit_zext_32(ctx, dst, is32);
601 			move_reg(ctx, t1, src);
602 			emit_zext_32(ctx, t1, is32);
603 			emit_insn(ctx, moddu, dst, dst, t1);
604 			emit_zext_32(ctx, dst, is32);
605 		} else {
606 			emit_sext_32(ctx, dst, is32);
607 			move_reg(ctx, t1, src);
608 			emit_sext_32(ctx, t1, is32);
609 			emit_insn(ctx, modd, dst, dst, t1);
610 			emit_sext_32(ctx, dst, is32);
611 		}
612 		break;
613 
614 	/* dst = dst % imm */
615 	case BPF_ALU | BPF_MOD | BPF_K:
616 	case BPF_ALU64 | BPF_MOD | BPF_K:
617 		if (!off) {
618 			move_imm(ctx, t1, imm, is32);
619 			emit_zext_32(ctx, dst, is32);
620 			emit_insn(ctx, moddu, dst, dst, t1);
621 			emit_zext_32(ctx, dst, is32);
622 		} else {
623 			move_imm(ctx, t1, imm, false);
624 			emit_sext_32(ctx, t1, is32);
625 			emit_sext_32(ctx, dst, is32);
626 			emit_insn(ctx, modd, dst, dst, t1);
627 			emit_sext_32(ctx, dst, is32);
628 		}
629 		break;
630 
631 	/* dst = -dst */
632 	case BPF_ALU | BPF_NEG:
633 	case BPF_ALU64 | BPF_NEG:
634 		move_imm(ctx, t1, imm, is32);
635 		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
636 		emit_zext_32(ctx, dst, is32);
637 		break;
638 
639 	/* dst = dst & src */
640 	case BPF_ALU | BPF_AND | BPF_X:
641 	case BPF_ALU64 | BPF_AND | BPF_X:
642 		emit_insn(ctx, and, dst, dst, src);
643 		emit_zext_32(ctx, dst, is32);
644 		break;
645 
646 	/* dst = dst & imm */
647 	case BPF_ALU | BPF_AND | BPF_K:
648 	case BPF_ALU64 | BPF_AND | BPF_K:
649 		if (is_unsigned_imm12(imm)) {
650 			emit_insn(ctx, andi, dst, dst, imm);
651 		} else {
652 			move_imm(ctx, t1, imm, is32);
653 			emit_insn(ctx, and, dst, dst, t1);
654 		}
655 		emit_zext_32(ctx, dst, is32);
656 		break;
657 
658 	/* dst = dst | src */
659 	case BPF_ALU | BPF_OR | BPF_X:
660 	case BPF_ALU64 | BPF_OR | BPF_X:
661 		emit_insn(ctx, or, dst, dst, src);
662 		emit_zext_32(ctx, dst, is32);
663 		break;
664 
665 	/* dst = dst | imm */
666 	case BPF_ALU | BPF_OR | BPF_K:
667 	case BPF_ALU64 | BPF_OR | BPF_K:
668 		if (is_unsigned_imm12(imm)) {
669 			emit_insn(ctx, ori, dst, dst, imm);
670 		} else {
671 			move_imm(ctx, t1, imm, is32);
672 			emit_insn(ctx, or, dst, dst, t1);
673 		}
674 		emit_zext_32(ctx, dst, is32);
675 		break;
676 
677 	/* dst = dst ^ src */
678 	case BPF_ALU | BPF_XOR | BPF_X:
679 	case BPF_ALU64 | BPF_XOR | BPF_X:
680 		emit_insn(ctx, xor, dst, dst, src);
681 		emit_zext_32(ctx, dst, is32);
682 		break;
683 
684 	/* dst = dst ^ imm */
685 	case BPF_ALU | BPF_XOR | BPF_K:
686 	case BPF_ALU64 | BPF_XOR | BPF_K:
687 		if (is_unsigned_imm12(imm)) {
688 			emit_insn(ctx, xori, dst, dst, imm);
689 		} else {
690 			move_imm(ctx, t1, imm, is32);
691 			emit_insn(ctx, xor, dst, dst, t1);
692 		}
693 		emit_zext_32(ctx, dst, is32);
694 		break;
695 
696 	/* dst = dst << src (logical) */
697 	case BPF_ALU | BPF_LSH | BPF_X:
698 		emit_insn(ctx, sllw, dst, dst, src);
699 		emit_zext_32(ctx, dst, is32);
700 		break;
701 
702 	case BPF_ALU64 | BPF_LSH | BPF_X:
703 		emit_insn(ctx, slld, dst, dst, src);
704 		break;
705 
706 	/* dst = dst << imm (logical) */
707 	case BPF_ALU | BPF_LSH | BPF_K:
708 		emit_insn(ctx, slliw, dst, dst, imm);
709 		emit_zext_32(ctx, dst, is32);
710 		break;
711 
712 	case BPF_ALU64 | BPF_LSH | BPF_K:
713 		emit_insn(ctx, sllid, dst, dst, imm);
714 		break;
715 
716 	/* dst = dst >> src (logical) */
717 	case BPF_ALU | BPF_RSH | BPF_X:
718 		emit_insn(ctx, srlw, dst, dst, src);
719 		emit_zext_32(ctx, dst, is32);
720 		break;
721 
722 	case BPF_ALU64 | BPF_RSH | BPF_X:
723 		emit_insn(ctx, srld, dst, dst, src);
724 		break;
725 
726 	/* dst = dst >> imm (logical) */
727 	case BPF_ALU | BPF_RSH | BPF_K:
728 		emit_insn(ctx, srliw, dst, dst, imm);
729 		emit_zext_32(ctx, dst, is32);
730 		break;
731 
732 	case BPF_ALU64 | BPF_RSH | BPF_K:
733 		emit_insn(ctx, srlid, dst, dst, imm);
734 		break;
735 
736 	/* dst = dst >> src (arithmetic) */
737 	case BPF_ALU | BPF_ARSH | BPF_X:
738 		emit_insn(ctx, sraw, dst, dst, src);
739 		emit_zext_32(ctx, dst, is32);
740 		break;
741 
742 	case BPF_ALU64 | BPF_ARSH | BPF_X:
743 		emit_insn(ctx, srad, dst, dst, src);
744 		break;
745 
746 	/* dst = dst >> imm (arithmetic) */
747 	case BPF_ALU | BPF_ARSH | BPF_K:
748 		emit_insn(ctx, sraiw, dst, dst, imm);
749 		emit_zext_32(ctx, dst, is32);
750 		break;
751 
752 	case BPF_ALU64 | BPF_ARSH | BPF_K:
753 		emit_insn(ctx, sraid, dst, dst, imm);
754 		break;
755 
756 	/* dst = BSWAP##imm(dst) */
757 	case BPF_ALU | BPF_END | BPF_FROM_LE:
758 		switch (imm) {
759 		case 16:
760 			/* zero-extend 16 bits into 64 bits */
761 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
762 			break;
763 		case 32:
764 			/* zero-extend 32 bits into 64 bits */
765 			emit_zext_32(ctx, dst, is32);
766 			break;
767 		case 64:
768 			/* do nothing */
769 			break;
770 		}
771 		break;
772 
773 	case BPF_ALU | BPF_END | BPF_FROM_BE:
774 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
775 		switch (imm) {
776 		case 16:
777 			emit_insn(ctx, revb2h, dst, dst);
778 			/* zero-extend 16 bits into 64 bits */
779 			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
780 			break;
781 		case 32:
782 			emit_insn(ctx, revb2w, dst, dst);
783 			/* clear the upper 32 bits */
784 			emit_zext_32(ctx, dst, true);
785 			break;
786 		case 64:
787 			emit_insn(ctx, revbd, dst, dst);
788 			break;
789 		}
790 		break;
791 
792 	/* PC += off if dst cond src */
793 	case BPF_JMP | BPF_JEQ | BPF_X:
794 	case BPF_JMP | BPF_JNE | BPF_X:
795 	case BPF_JMP | BPF_JGT | BPF_X:
796 	case BPF_JMP | BPF_JGE | BPF_X:
797 	case BPF_JMP | BPF_JLT | BPF_X:
798 	case BPF_JMP | BPF_JLE | BPF_X:
799 	case BPF_JMP | BPF_JSGT | BPF_X:
800 	case BPF_JMP | BPF_JSGE | BPF_X:
801 	case BPF_JMP | BPF_JSLT | BPF_X:
802 	case BPF_JMP | BPF_JSLE | BPF_X:
803 	case BPF_JMP32 | BPF_JEQ | BPF_X:
804 	case BPF_JMP32 | BPF_JNE | BPF_X:
805 	case BPF_JMP32 | BPF_JGT | BPF_X:
806 	case BPF_JMP32 | BPF_JGE | BPF_X:
807 	case BPF_JMP32 | BPF_JLT | BPF_X:
808 	case BPF_JMP32 | BPF_JLE | BPF_X:
809 	case BPF_JMP32 | BPF_JSGT | BPF_X:
810 	case BPF_JMP32 | BPF_JSGE | BPF_X:
811 	case BPF_JMP32 | BPF_JSLT | BPF_X:
812 	case BPF_JMP32 | BPF_JSLE | BPF_X:
813 		jmp_offset = bpf2la_offset(i, off, ctx);
814 		move_reg(ctx, t1, dst);
815 		move_reg(ctx, t2, src);
816 		if (is_signed_bpf_cond(BPF_OP(code))) {
817 			emit_sext_32(ctx, t1, is32);
818 			emit_sext_32(ctx, t2, is32);
819 		} else {
820 			emit_zext_32(ctx, t1, is32);
821 			emit_zext_32(ctx, t2, is32);
822 		}
823 		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
824 			goto toofar;
825 		break;
826 
827 	/* PC += off if dst cond imm */
828 	case BPF_JMP | BPF_JEQ | BPF_K:
829 	case BPF_JMP | BPF_JNE | BPF_K:
830 	case BPF_JMP | BPF_JGT | BPF_K:
831 	case BPF_JMP | BPF_JGE | BPF_K:
832 	case BPF_JMP | BPF_JLT | BPF_K:
833 	case BPF_JMP | BPF_JLE | BPF_K:
834 	case BPF_JMP | BPF_JSGT | BPF_K:
835 	case BPF_JMP | BPF_JSGE | BPF_K:
836 	case BPF_JMP | BPF_JSLT | BPF_K:
837 	case BPF_JMP | BPF_JSLE | BPF_K:
838 	case BPF_JMP32 | BPF_JEQ | BPF_K:
839 	case BPF_JMP32 | BPF_JNE | BPF_K:
840 	case BPF_JMP32 | BPF_JGT | BPF_K:
841 	case BPF_JMP32 | BPF_JGE | BPF_K:
842 	case BPF_JMP32 | BPF_JLT | BPF_K:
843 	case BPF_JMP32 | BPF_JLE | BPF_K:
844 	case BPF_JMP32 | BPF_JSGT | BPF_K:
845 	case BPF_JMP32 | BPF_JSGE | BPF_K:
846 	case BPF_JMP32 | BPF_JSLT | BPF_K:
847 	case BPF_JMP32 | BPF_JSLE | BPF_K:
848 		jmp_offset = bpf2la_offset(i, off, ctx);
849 		if (imm) {
850 			move_imm(ctx, t1, imm, false);
851 			tm = t1;
852 		} else {
853 			/* If imm is 0, simply use zero register. */
854 			tm = LOONGARCH_GPR_ZERO;
855 		}
856 		move_reg(ctx, t2, dst);
857 		if (is_signed_bpf_cond(BPF_OP(code))) {
858 			emit_sext_32(ctx, tm, is32);
859 			emit_sext_32(ctx, t2, is32);
860 		} else {
861 			emit_zext_32(ctx, tm, is32);
862 			emit_zext_32(ctx, t2, is32);
863 		}
864 		if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
865 			goto toofar;
866 		break;
867 
868 	/* PC += off if dst & src */
869 	case BPF_JMP | BPF_JSET | BPF_X:
870 	case BPF_JMP32 | BPF_JSET | BPF_X:
871 		jmp_offset = bpf2la_offset(i, off, ctx);
872 		emit_insn(ctx, and, t1, dst, src);
873 		emit_zext_32(ctx, t1, is32);
874 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
875 			goto toofar;
876 		break;
877 
878 	/* PC += off if dst & imm */
879 	case BPF_JMP | BPF_JSET | BPF_K:
880 	case BPF_JMP32 | BPF_JSET | BPF_K:
881 		jmp_offset = bpf2la_offset(i, off, ctx);
882 		move_imm(ctx, t1, imm, is32);
883 		emit_insn(ctx, and, t1, dst, t1);
884 		emit_zext_32(ctx, t1, is32);
885 		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
886 			goto toofar;
887 		break;
888 
889 	/* PC += off */
890 	case BPF_JMP | BPF_JA:
891 	case BPF_JMP32 | BPF_JA:
892 		if (BPF_CLASS(code) == BPF_JMP)
893 			jmp_offset = bpf2la_offset(i, off, ctx);
894 		else
895 			jmp_offset = bpf2la_offset(i, imm, ctx);
896 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
897 			goto toofar;
898 		break;
899 
900 	/* function call */
901 	case BPF_JMP | BPF_CALL:
902 		mark_call(ctx);
903 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
904 					    &func_addr, &func_addr_fixed);
905 		if (ret < 0)
906 			return ret;
907 
908 		move_addr(ctx, t1, func_addr);
909 		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
910 
911 		if (insn->src_reg != BPF_PSEUDO_CALL)
912 			move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
913 
914 		break;
915 
916 	/* tail call */
917 	case BPF_JMP | BPF_TAIL_CALL:
918 		mark_tail_call(ctx);
919 		if (emit_bpf_tail_call(ctx) < 0)
920 			return -EINVAL;
921 		break;
922 
923 	/* function return */
924 	case BPF_JMP | BPF_EXIT:
925 		if (i == ctx->prog->len - 1)
926 			break;
927 
928 		jmp_offset = epilogue_offset(ctx);
929 		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
930 			goto toofar;
931 		break;
932 
933 	/* dst = imm64 */
934 	case BPF_LD | BPF_IMM | BPF_DW:
935 	{
936 		const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
937 
938 		if (bpf_pseudo_func(insn))
939 			move_addr(ctx, dst, imm64);
940 		else
941 			move_imm(ctx, dst, imm64, is32);
942 		return 1;
943 	}
944 
945 	/* dst = *(size *)(src + off) */
946 	case BPF_LDX | BPF_MEM | BPF_B:
947 	case BPF_LDX | BPF_MEM | BPF_H:
948 	case BPF_LDX | BPF_MEM | BPF_W:
949 	case BPF_LDX | BPF_MEM | BPF_DW:
950 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
951 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
952 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
953 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
954 	/* dst_reg = (s64)*(signed size *)(src_reg + off) */
955 	case BPF_LDX | BPF_MEMSX | BPF_B:
956 	case BPF_LDX | BPF_MEMSX | BPF_H:
957 	case BPF_LDX | BPF_MEMSX | BPF_W:
958 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
959 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
960 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
961 		sign_extend = BPF_MODE(insn->code) == BPF_MEMSX ||
962 			      BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
963 		switch (BPF_SIZE(code)) {
964 		case BPF_B:
965 			if (is_signed_imm12(off)) {
966 				if (sign_extend)
967 					emit_insn(ctx, ldb, dst, src, off);
968 				else
969 					emit_insn(ctx, ldbu, dst, src, off);
970 			} else {
971 				move_imm(ctx, t1, off, is32);
972 				if (sign_extend)
973 					emit_insn(ctx, ldxb, dst, src, t1);
974 				else
975 					emit_insn(ctx, ldxbu, dst, src, t1);
976 			}
977 			break;
978 		case BPF_H:
979 			if (is_signed_imm12(off)) {
980 				if (sign_extend)
981 					emit_insn(ctx, ldh, dst, src, off);
982 				else
983 					emit_insn(ctx, ldhu, dst, src, off);
984 			} else {
985 				move_imm(ctx, t1, off, is32);
986 				if (sign_extend)
987 					emit_insn(ctx, ldxh, dst, src, t1);
988 				else
989 					emit_insn(ctx, ldxhu, dst, src, t1);
990 			}
991 			break;
992 		case BPF_W:
993 			if (is_signed_imm12(off)) {
994 				if (sign_extend)
995 					emit_insn(ctx, ldw, dst, src, off);
996 				else
997 					emit_insn(ctx, ldwu, dst, src, off);
998 			} else {
999 				move_imm(ctx, t1, off, is32);
1000 				if (sign_extend)
1001 					emit_insn(ctx, ldxw, dst, src, t1);
1002 				else
1003 					emit_insn(ctx, ldxwu, dst, src, t1);
1004 			}
1005 			break;
1006 		case BPF_DW:
1007 			move_imm(ctx, t1, off, is32);
1008 			emit_insn(ctx, ldxd, dst, src, t1);
1009 			break;
1010 		}
1011 
1012 		ret = add_exception_handler(insn, ctx, dst);
1013 		if (ret)
1014 			return ret;
1015 		break;
1016 
1017 	/* *(size *)(dst + off) = imm */
1018 	case BPF_ST | BPF_MEM | BPF_B:
1019 	case BPF_ST | BPF_MEM | BPF_H:
1020 	case BPF_ST | BPF_MEM | BPF_W:
1021 	case BPF_ST | BPF_MEM | BPF_DW:
1022 		switch (BPF_SIZE(code)) {
1023 		case BPF_B:
1024 			move_imm(ctx, t1, imm, is32);
1025 			if (is_signed_imm12(off)) {
1026 				emit_insn(ctx, stb, t1, dst, off);
1027 			} else {
1028 				move_imm(ctx, t2, off, is32);
1029 				emit_insn(ctx, stxb, t1, dst, t2);
1030 			}
1031 			break;
1032 		case BPF_H:
1033 			move_imm(ctx, t1, imm, is32);
1034 			if (is_signed_imm12(off)) {
1035 				emit_insn(ctx, sth, t1, dst, off);
1036 			} else {
1037 				move_imm(ctx, t2, off, is32);
1038 				emit_insn(ctx, stxh, t1, dst, t2);
1039 			}
1040 			break;
1041 		case BPF_W:
1042 			move_imm(ctx, t1, imm, is32);
1043 			if (is_signed_imm12(off)) {
1044 				emit_insn(ctx, stw, t1, dst, off);
1045 			} else if (is_signed_imm14(off)) {
1046 				emit_insn(ctx, stptrw, t1, dst, off);
1047 			} else {
1048 				move_imm(ctx, t2, off, is32);
1049 				emit_insn(ctx, stxw, t1, dst, t2);
1050 			}
1051 			break;
1052 		case BPF_DW:
1053 			move_imm(ctx, t1, imm, is32);
1054 			if (is_signed_imm12(off)) {
1055 				emit_insn(ctx, std, t1, dst, off);
1056 			} else if (is_signed_imm14(off)) {
1057 				emit_insn(ctx, stptrd, t1, dst, off);
1058 			} else {
1059 				move_imm(ctx, t2, off, is32);
1060 				emit_insn(ctx, stxd, t1, dst, t2);
1061 			}
1062 			break;
1063 		}
1064 		break;
1065 
1066 	/* *(size *)(dst + off) = src */
1067 	case BPF_STX | BPF_MEM | BPF_B:
1068 	case BPF_STX | BPF_MEM | BPF_H:
1069 	case BPF_STX | BPF_MEM | BPF_W:
1070 	case BPF_STX | BPF_MEM | BPF_DW:
1071 		switch (BPF_SIZE(code)) {
1072 		case BPF_B:
1073 			if (is_signed_imm12(off)) {
1074 				emit_insn(ctx, stb, src, dst, off);
1075 			} else {
1076 				move_imm(ctx, t1, off, is32);
1077 				emit_insn(ctx, stxb, src, dst, t1);
1078 			}
1079 			break;
1080 		case BPF_H:
1081 			if (is_signed_imm12(off)) {
1082 				emit_insn(ctx, sth, src, dst, off);
1083 			} else {
1084 				move_imm(ctx, t1, off, is32);
1085 				emit_insn(ctx, stxh, src, dst, t1);
1086 			}
1087 			break;
1088 		case BPF_W:
1089 			if (is_signed_imm12(off)) {
1090 				emit_insn(ctx, stw, src, dst, off);
1091 			} else if (is_signed_imm14(off)) {
1092 				emit_insn(ctx, stptrw, src, dst, off);
1093 			} else {
1094 				move_imm(ctx, t1, off, is32);
1095 				emit_insn(ctx, stxw, src, dst, t1);
1096 			}
1097 			break;
1098 		case BPF_DW:
1099 			if (is_signed_imm12(off)) {
1100 				emit_insn(ctx, std, src, dst, off);
1101 			} else if (is_signed_imm14(off)) {
1102 				emit_insn(ctx, stptrd, src, dst, off);
1103 			} else {
1104 				move_imm(ctx, t1, off, is32);
1105 				emit_insn(ctx, stxd, src, dst, t1);
1106 			}
1107 			break;
1108 		}
1109 		break;
1110 
1111 	case BPF_STX | BPF_ATOMIC | BPF_W:
1112 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1113 		emit_atomic(insn, ctx);
1114 		break;
1115 
1116 	/* Speculation barrier */
1117 	case BPF_ST | BPF_NOSPEC:
1118 		break;
1119 
1120 	default:
1121 		pr_err("bpf_jit: unknown opcode %02x\n", code);
1122 		return -EINVAL;
1123 	}
1124 
1125 	return 0;
1126 
1127 toofar:
1128 	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
1129 	return -E2BIG;
1130 }
1131 
build_body(struct jit_ctx * ctx,bool extra_pass)1132 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1133 {
1134 	int i;
1135 	const struct bpf_prog *prog = ctx->prog;
1136 
1137 	for (i = 0; i < prog->len; i++) {
1138 		const struct bpf_insn *insn = &prog->insnsi[i];
1139 		int ret;
1140 
1141 		if (ctx->image == NULL)
1142 			ctx->offset[i] = ctx->idx;
1143 
1144 		ret = build_insn(insn, ctx, extra_pass);
1145 		if (ret > 0) {
1146 			i++;
1147 			if (ctx->image == NULL)
1148 				ctx->offset[i] = ctx->idx;
1149 			continue;
1150 		}
1151 		if (ret)
1152 			return ret;
1153 	}
1154 
1155 	if (ctx->image == NULL)
1156 		ctx->offset[i] = ctx->idx;
1157 
1158 	return 0;
1159 }
1160 
1161 /* Fill space with break instructions */
jit_fill_hole(void * area,unsigned int size)1162 static void jit_fill_hole(void *area, unsigned int size)
1163 {
1164 	u32 *ptr;
1165 
1166 	/* We are guaranteed to have aligned memory */
1167 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1168 		*ptr++ = INSN_BREAK;
1169 }
1170 
validate_code(struct jit_ctx * ctx)1171 static int validate_code(struct jit_ctx *ctx)
1172 {
1173 	int i;
1174 	union loongarch_instruction insn;
1175 
1176 	for (i = 0; i < ctx->idx; i++) {
1177 		insn = ctx->image[i];
1178 		/* Check INSN_BREAK */
1179 		if (insn.word == INSN_BREAK)
1180 			return -1;
1181 	}
1182 
1183 	if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
1184 		return -1;
1185 
1186 	return 0;
1187 }
1188 
bpf_int_jit_compile(struct bpf_prog * prog)1189 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1190 {
1191 	bool tmp_blinded = false, extra_pass = false;
1192 	u8 *image_ptr;
1193 	int image_size, prog_size, extable_size;
1194 	struct jit_ctx ctx;
1195 	struct jit_data *jit_data;
1196 	struct bpf_binary_header *header;
1197 	struct bpf_prog *tmp, *orig_prog = prog;
1198 
1199 	/*
1200 	 * If BPF JIT was not enabled then we must fall back to
1201 	 * the interpreter.
1202 	 */
1203 	if (!prog->jit_requested)
1204 		return orig_prog;
1205 
1206 	tmp = bpf_jit_blind_constants(prog);
1207 	/*
1208 	 * If blinding was requested and we failed during blinding,
1209 	 * we must fall back to the interpreter. Otherwise, we save
1210 	 * the new JITed code.
1211 	 */
1212 	if (IS_ERR(tmp))
1213 		return orig_prog;
1214 
1215 	if (tmp != prog) {
1216 		tmp_blinded = true;
1217 		prog = tmp;
1218 	}
1219 
1220 	jit_data = prog->aux->jit_data;
1221 	if (!jit_data) {
1222 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1223 		if (!jit_data) {
1224 			prog = orig_prog;
1225 			goto out;
1226 		}
1227 		prog->aux->jit_data = jit_data;
1228 	}
1229 	if (jit_data->ctx.offset) {
1230 		ctx = jit_data->ctx;
1231 		image_ptr = jit_data->image;
1232 		header = jit_data->header;
1233 		extra_pass = true;
1234 		prog_size = sizeof(u32) * ctx.idx;
1235 		goto skip_init_ctx;
1236 	}
1237 
1238 	memset(&ctx, 0, sizeof(ctx));
1239 	ctx.prog = prog;
1240 
1241 	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
1242 	if (ctx.offset == NULL) {
1243 		prog = orig_prog;
1244 		goto out_offset;
1245 	}
1246 
1247 	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
1248 	build_prologue(&ctx);
1249 	if (build_body(&ctx, extra_pass)) {
1250 		prog = orig_prog;
1251 		goto out_offset;
1252 	}
1253 	ctx.epilogue_offset = ctx.idx;
1254 	build_epilogue(&ctx);
1255 
1256 	extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
1257 
1258 	/* Now we know the actual image size.
1259 	 * As each LoongArch instruction is of length 32bit,
1260 	 * we are translating number of JITed intructions into
1261 	 * the size required to store these JITed code.
1262 	 */
1263 	prog_size = sizeof(u32) * ctx.idx;
1264 	image_size = prog_size + extable_size;
1265 	/* Now we know the size of the structure to make */
1266 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1267 				      sizeof(u32), jit_fill_hole);
1268 	if (header == NULL) {
1269 		prog = orig_prog;
1270 		goto out_offset;
1271 	}
1272 
1273 	/* 2. Now, the actual pass to generate final JIT code */
1274 	ctx.image = (union loongarch_instruction *)image_ptr;
1275 	if (extable_size)
1276 		prog->aux->extable = (void *)image_ptr + prog_size;
1277 
1278 skip_init_ctx:
1279 	ctx.idx = 0;
1280 	ctx.num_exentries = 0;
1281 
1282 	build_prologue(&ctx);
1283 	if (build_body(&ctx, extra_pass)) {
1284 		bpf_jit_binary_free(header);
1285 		prog = orig_prog;
1286 		goto out_offset;
1287 	}
1288 	build_epilogue(&ctx);
1289 
1290 	/* 3. Extra pass to validate JITed code */
1291 	if (validate_code(&ctx)) {
1292 		bpf_jit_binary_free(header);
1293 		prog = orig_prog;
1294 		goto out_offset;
1295 	}
1296 
1297 	/* And we're done */
1298 	if (bpf_jit_enable > 1)
1299 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1300 
1301 	/* Update the icache */
1302 	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
1303 
1304 	if (!prog->is_func || extra_pass) {
1305 		int err;
1306 
1307 		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1308 			pr_err_once("multi-func JIT bug %d != %d\n",
1309 				    ctx.idx, jit_data->ctx.idx);
1310 			goto out_free;
1311 		}
1312 		err = bpf_jit_binary_lock_ro(header);
1313 		if (err) {
1314 			pr_err_once("bpf_jit_binary_lock_ro() returned %d\n",
1315 				    err);
1316 			goto out_free;
1317 		}
1318 	} else {
1319 		jit_data->ctx = ctx;
1320 		jit_data->image = image_ptr;
1321 		jit_data->header = header;
1322 	}
1323 	prog->jited = 1;
1324 	prog->jited_len = prog_size;
1325 	prog->bpf_func = (void *)ctx.image;
1326 
1327 	if (!prog->is_func || extra_pass) {
1328 		int i;
1329 
1330 		/* offset[prog->len] is the size of program */
1331 		for (i = 0; i <= prog->len; i++)
1332 			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
1333 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1334 
1335 out_offset:
1336 		kvfree(ctx.offset);
1337 		kfree(jit_data);
1338 		prog->aux->jit_data = NULL;
1339 	}
1340 
1341 out:
1342 	if (tmp_blinded)
1343 		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
1344 
1345 	out_offset = -1;
1346 
1347 	return prog;
1348 
1349 out_free:
1350 	bpf_jit_binary_free(header);
1351 	prog->bpf_func = NULL;
1352 	prog->jited = 0;
1353 	prog->jited_len = 0;
1354 	goto out_offset;
1355 }
1356 
1357 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bpf_jit_supports_subprog_tailcalls(void)1358 bool bpf_jit_supports_subprog_tailcalls(void)
1359 {
1360 	return true;
1361 }
1362