1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/init.h>
4 #include <linux/module.h>
5 #include <linux/netfilter.h>
6 #include <linux/rhashtable.h>
7 #include <linux/ip.h>
8 #include <linux/ipv6.h>
9 #include <linux/netdevice.h>
10 #include <linux/if_ether.h>
11 #include <net/gso.h>
12 #include <net/ip.h>
13 #include <net/ipv6.h>
14 #include <net/ip6_route.h>
15 #include <net/neighbour.h>
16 #include <net/netfilter/nf_flow_table.h>
17 #include <net/netfilter/nf_conntrack_acct.h>
18 /* For layer 4 checksum field offset. */
19 #include <linux/tcp.h>
20 #include <linux/udp.h>
21
nf_flow_state_check(struct flow_offload * flow,int proto,struct sk_buff * skb,unsigned int thoff)22 static int nf_flow_state_check(struct flow_offload *flow, int proto,
23 struct sk_buff *skb, unsigned int thoff)
24 {
25 struct tcphdr *tcph;
26
27 if (proto != IPPROTO_TCP)
28 return 0;
29
30 tcph = (void *)(skb_network_header(skb) + thoff);
31 if (tcph->syn && test_bit(NF_FLOW_CLOSING, &flow->flags)) {
32 flow_offload_teardown(flow);
33 return -1;
34 }
35
36 if ((tcph->fin || tcph->rst) &&
37 !test_bit(NF_FLOW_CLOSING, &flow->flags))
38 set_bit(NF_FLOW_CLOSING, &flow->flags);
39
40 return 0;
41 }
42
nf_flow_nat_ip_tcp(struct sk_buff * skb,unsigned int thoff,__be32 addr,__be32 new_addr)43 static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
44 __be32 addr, __be32 new_addr)
45 {
46 struct tcphdr *tcph;
47
48 tcph = (void *)(skb_network_header(skb) + thoff);
49 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
50 }
51
nf_flow_nat_ip_udp(struct sk_buff * skb,unsigned int thoff,__be32 addr,__be32 new_addr)52 static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
53 __be32 addr, __be32 new_addr)
54 {
55 struct udphdr *udph;
56
57 udph = (void *)(skb_network_header(skb) + thoff);
58 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
59 inet_proto_csum_replace4(&udph->check, skb, addr,
60 new_addr, true);
61 if (!udph->check)
62 udph->check = CSUM_MANGLED_0;
63 }
64 }
65
nf_flow_nat_ip_l4proto(struct sk_buff * skb,struct iphdr * iph,unsigned int thoff,__be32 addr,__be32 new_addr)66 static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
67 unsigned int thoff, __be32 addr,
68 __be32 new_addr)
69 {
70 switch (iph->protocol) {
71 case IPPROTO_TCP:
72 nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
73 break;
74 case IPPROTO_UDP:
75 nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
76 break;
77 }
78 }
79
nf_flow_snat_ip(const struct flow_offload * flow,struct sk_buff * skb,struct iphdr * iph,unsigned int thoff,enum flow_offload_tuple_dir dir)80 static void nf_flow_snat_ip(const struct flow_offload *flow,
81 struct sk_buff *skb, struct iphdr *iph,
82 unsigned int thoff, enum flow_offload_tuple_dir dir)
83 {
84 __be32 addr, new_addr;
85
86 switch (dir) {
87 case FLOW_OFFLOAD_DIR_ORIGINAL:
88 addr = iph->saddr;
89 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
90 iph->saddr = new_addr;
91 break;
92 case FLOW_OFFLOAD_DIR_REPLY:
93 addr = iph->daddr;
94 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
95 iph->daddr = new_addr;
96 break;
97 }
98 csum_replace4(&iph->check, addr, new_addr);
99
100 nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
101 }
102
nf_flow_dnat_ip(const struct flow_offload * flow,struct sk_buff * skb,struct iphdr * iph,unsigned int thoff,enum flow_offload_tuple_dir dir)103 static void nf_flow_dnat_ip(const struct flow_offload *flow,
104 struct sk_buff *skb, struct iphdr *iph,
105 unsigned int thoff, enum flow_offload_tuple_dir dir)
106 {
107 __be32 addr, new_addr;
108
109 switch (dir) {
110 case FLOW_OFFLOAD_DIR_ORIGINAL:
111 addr = iph->daddr;
112 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
113 iph->daddr = new_addr;
114 break;
115 case FLOW_OFFLOAD_DIR_REPLY:
116 addr = iph->saddr;
117 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
118 iph->saddr = new_addr;
119 break;
120 }
121 csum_replace4(&iph->check, addr, new_addr);
122
123 nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
124 }
125
nf_flow_nat_ip(const struct flow_offload * flow,struct sk_buff * skb,unsigned int thoff,enum flow_offload_tuple_dir dir,struct iphdr * iph)126 static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
127 unsigned int thoff, enum flow_offload_tuple_dir dir,
128 struct iphdr *iph)
129 {
130 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
131 nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
132 nf_flow_snat_ip(flow, skb, iph, thoff, dir);
133 }
134 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
135 nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
136 nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
137 }
138 }
139
ip_has_options(unsigned int thoff)140 static bool ip_has_options(unsigned int thoff)
141 {
142 return thoff != sizeof(struct iphdr);
143 }
144
nf_flow_tuple_encap(struct sk_buff * skb,struct flow_offload_tuple * tuple)145 static void nf_flow_tuple_encap(struct sk_buff *skb,
146 struct flow_offload_tuple *tuple)
147 {
148 struct vlan_ethhdr *veth;
149 struct pppoe_hdr *phdr;
150 int i = 0;
151
152 if (skb_vlan_tag_present(skb)) {
153 tuple->encap[i].id = skb_vlan_tag_get(skb);
154 tuple->encap[i].proto = skb->vlan_proto;
155 i++;
156 }
157 switch (skb->protocol) {
158 case htons(ETH_P_8021Q):
159 veth = (struct vlan_ethhdr *)skb_mac_header(skb);
160 tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
161 tuple->encap[i].proto = skb->protocol;
162 break;
163 case htons(ETH_P_PPP_SES):
164 phdr = (struct pppoe_hdr *)skb_network_header(skb);
165 tuple->encap[i].id = ntohs(phdr->sid);
166 tuple->encap[i].proto = skb->protocol;
167 break;
168 }
169 }
170
171 struct nf_flowtable_ctx {
172 const struct net_device *in;
173 u32 offset;
174 u32 hdrsize;
175 };
176
nf_flow_tuple_ip(struct nf_flowtable_ctx * ctx,struct sk_buff * skb,struct flow_offload_tuple * tuple)177 static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
178 struct flow_offload_tuple *tuple)
179 {
180 struct flow_ports *ports;
181 unsigned int thoff;
182 struct iphdr *iph;
183 u8 ipproto;
184
185 if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
186 return -1;
187
188 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
189 thoff = (iph->ihl * 4);
190
191 if (ip_is_fragment(iph) ||
192 unlikely(ip_has_options(thoff)))
193 return -1;
194
195 thoff += ctx->offset;
196
197 ipproto = iph->protocol;
198 switch (ipproto) {
199 case IPPROTO_TCP:
200 ctx->hdrsize = sizeof(struct tcphdr);
201 break;
202 case IPPROTO_UDP:
203 ctx->hdrsize = sizeof(struct udphdr);
204 break;
205 #ifdef CONFIG_NF_CT_PROTO_GRE
206 case IPPROTO_GRE:
207 ctx->hdrsize = sizeof(struct gre_base_hdr);
208 break;
209 #endif
210 default:
211 return -1;
212 }
213
214 if (iph->ttl <= 1)
215 return -1;
216
217 if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
218 return -1;
219
220 switch (ipproto) {
221 case IPPROTO_TCP:
222 case IPPROTO_UDP:
223 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
224 tuple->src_port = ports->source;
225 tuple->dst_port = ports->dest;
226 break;
227 case IPPROTO_GRE: {
228 struct gre_base_hdr *greh;
229
230 greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
231 if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
232 return -1;
233 break;
234 }
235 }
236
237 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
238
239 tuple->src_v4.s_addr = iph->saddr;
240 tuple->dst_v4.s_addr = iph->daddr;
241 tuple->l3proto = AF_INET;
242 tuple->l4proto = ipproto;
243 tuple->iifidx = ctx->in->ifindex;
244 nf_flow_tuple_encap(skb, tuple);
245
246 return 0;
247 }
248
249 /* Based on ip_exceeds_mtu(). */
nf_flow_exceeds_mtu(const struct sk_buff * skb,unsigned int mtu)250 static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
251 {
252 if (skb->len <= mtu)
253 return false;
254
255 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
256 return false;
257
258 return true;
259 }
260
nf_flow_dst_check(struct flow_offload_tuple * tuple)261 static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
262 {
263 if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
264 tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
265 return true;
266
267 return dst_check(tuple->dst_cache, tuple->dst_cookie);
268 }
269
nf_flow_xmit_xfrm(struct sk_buff * skb,const struct nf_hook_state * state,struct dst_entry * dst)270 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
271 const struct nf_hook_state *state,
272 struct dst_entry *dst)
273 {
274 skb_orphan(skb);
275 skb_dst_set_noref(skb, dst);
276 dst_output(state->net, state->sk, skb);
277 return NF_STOLEN;
278 }
279
nf_flow_skb_encap_protocol(struct sk_buff * skb,__be16 proto,u32 * offset)280 static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
281 u32 *offset)
282 {
283 struct vlan_ethhdr *veth;
284 __be16 inner_proto;
285
286 switch (skb->protocol) {
287 case htons(ETH_P_8021Q):
288 if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth)))
289 return false;
290
291 veth = (struct vlan_ethhdr *)skb_mac_header(skb);
292 if (veth->h_vlan_encapsulated_proto == proto) {
293 *offset += VLAN_HLEN;
294 return true;
295 }
296 break;
297 case htons(ETH_P_PPP_SES):
298 if (nf_flow_pppoe_proto(skb, &inner_proto) &&
299 inner_proto == proto) {
300 *offset += PPPOE_SES_HLEN;
301 return true;
302 }
303 break;
304 }
305
306 return false;
307 }
308
nf_flow_encap_pop(struct sk_buff * skb,struct flow_offload_tuple_rhash * tuplehash)309 static void nf_flow_encap_pop(struct sk_buff *skb,
310 struct flow_offload_tuple_rhash *tuplehash)
311 {
312 struct vlan_hdr *vlan_hdr;
313 int i;
314
315 for (i = 0; i < tuplehash->tuple.encap_num; i++) {
316 if (skb_vlan_tag_present(skb)) {
317 __vlan_hwaccel_clear_tag(skb);
318 continue;
319 }
320 switch (skb->protocol) {
321 case htons(ETH_P_8021Q):
322 vlan_hdr = (struct vlan_hdr *)skb->data;
323 __skb_pull(skb, VLAN_HLEN);
324 vlan_set_encap_proto(skb, vlan_hdr);
325 skb_reset_network_header(skb);
326 break;
327 case htons(ETH_P_PPP_SES):
328 skb->protocol = __nf_flow_pppoe_proto(skb);
329 skb_pull(skb, PPPOE_SES_HLEN);
330 skb_reset_network_header(skb);
331 break;
332 }
333 }
334 }
335
nf_flow_queue_xmit(struct net * net,struct sk_buff * skb,const struct flow_offload_tuple_rhash * tuplehash,unsigned short type)336 static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
337 const struct flow_offload_tuple_rhash *tuplehash,
338 unsigned short type)
339 {
340 struct net_device *outdev;
341
342 outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
343 if (!outdev)
344 return NF_DROP;
345
346 skb->dev = outdev;
347 dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
348 tuplehash->tuple.out.h_source, skb->len);
349 dev_queue_xmit(skb);
350
351 return NF_STOLEN;
352 }
353
354 static struct flow_offload_tuple_rhash *
nf_flow_offload_lookup(struct nf_flowtable_ctx * ctx,struct nf_flowtable * flow_table,struct sk_buff * skb)355 nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
356 struct nf_flowtable *flow_table, struct sk_buff *skb)
357 {
358 struct flow_offload_tuple tuple = {};
359
360 if (skb->protocol != htons(ETH_P_IP) &&
361 !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
362 return NULL;
363
364 if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
365 return NULL;
366
367 return flow_offload_lookup(flow_table, &tuple);
368 }
369
nf_flow_offload_forward(struct nf_flowtable_ctx * ctx,struct nf_flowtable * flow_table,struct flow_offload_tuple_rhash * tuplehash,struct sk_buff * skb)370 static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
371 struct nf_flowtable *flow_table,
372 struct flow_offload_tuple_rhash *tuplehash,
373 struct sk_buff *skb)
374 {
375 enum flow_offload_tuple_dir dir;
376 struct flow_offload *flow;
377 unsigned int thoff, mtu;
378 struct iphdr *iph;
379
380 dir = tuplehash->tuple.dir;
381 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
382
383 mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
384 if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
385 return 0;
386
387 iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
388 thoff = (iph->ihl * 4) + ctx->offset;
389 if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
390 return 0;
391
392 if (!nf_flow_dst_check(&tuplehash->tuple)) {
393 flow_offload_teardown(flow);
394 return 0;
395 }
396
397 if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
398 return -1;
399
400 flow_offload_refresh(flow_table, flow, false);
401
402 nf_flow_encap_pop(skb, tuplehash);
403 thoff -= ctx->offset;
404
405 iph = ip_hdr(skb);
406 nf_flow_nat_ip(flow, skb, thoff, dir, iph);
407
408 ip_decrease_ttl(iph);
409 skb_clear_tstamp(skb);
410
411 if (flow_table->flags & NF_FLOWTABLE_COUNTER)
412 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
413
414 return 1;
415 }
416
417 unsigned int
nf_flow_offload_ip_hook(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)418 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
419 const struct nf_hook_state *state)
420 {
421 struct flow_offload_tuple_rhash *tuplehash;
422 struct nf_flowtable *flow_table = priv;
423 enum flow_offload_tuple_dir dir;
424 struct nf_flowtable_ctx ctx = {
425 .in = state->in,
426 };
427 struct flow_offload *flow;
428 struct net_device *outdev;
429 struct rtable *rt;
430 __be32 nexthop;
431 int ret;
432
433 tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
434 if (!tuplehash)
435 return NF_ACCEPT;
436
437 ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb);
438 if (ret < 0)
439 return NF_DROP;
440 else if (ret == 0)
441 return NF_ACCEPT;
442
443 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
444 rt = dst_rtable(tuplehash->tuple.dst_cache);
445 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
446 IPCB(skb)->iif = skb->dev->ifindex;
447 IPCB(skb)->flags = IPSKB_FORWARDED;
448 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
449 }
450
451 dir = tuplehash->tuple.dir;
452 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
453
454 switch (tuplehash->tuple.xmit_type) {
455 case FLOW_OFFLOAD_XMIT_NEIGH:
456 rt = dst_rtable(tuplehash->tuple.dst_cache);
457 outdev = rt->dst.dev;
458 skb->dev = outdev;
459 nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
460 skb_dst_set_noref(skb, &rt->dst);
461 neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
462 ret = NF_STOLEN;
463 break;
464 case FLOW_OFFLOAD_XMIT_DIRECT:
465 ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
466 if (ret == NF_DROP)
467 flow_offload_teardown(flow);
468 break;
469 default:
470 WARN_ON_ONCE(1);
471 ret = NF_DROP;
472 break;
473 }
474
475 return ret;
476 }
477 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
478
nf_flow_nat_ipv6_tcp(struct sk_buff * skb,unsigned int thoff,struct in6_addr * addr,struct in6_addr * new_addr,struct ipv6hdr * ip6h)479 static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
480 struct in6_addr *addr,
481 struct in6_addr *new_addr,
482 struct ipv6hdr *ip6h)
483 {
484 struct tcphdr *tcph;
485
486 tcph = (void *)(skb_network_header(skb) + thoff);
487 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
488 new_addr->s6_addr32, true);
489 }
490
nf_flow_nat_ipv6_udp(struct sk_buff * skb,unsigned int thoff,struct in6_addr * addr,struct in6_addr * new_addr)491 static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
492 struct in6_addr *addr,
493 struct in6_addr *new_addr)
494 {
495 struct udphdr *udph;
496
497 udph = (void *)(skb_network_header(skb) + thoff);
498 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
499 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
500 new_addr->s6_addr32, true);
501 if (!udph->check)
502 udph->check = CSUM_MANGLED_0;
503 }
504 }
505
nf_flow_nat_ipv6_l4proto(struct sk_buff * skb,struct ipv6hdr * ip6h,unsigned int thoff,struct in6_addr * addr,struct in6_addr * new_addr)506 static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
507 unsigned int thoff, struct in6_addr *addr,
508 struct in6_addr *new_addr)
509 {
510 switch (ip6h->nexthdr) {
511 case IPPROTO_TCP:
512 nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
513 break;
514 case IPPROTO_UDP:
515 nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
516 break;
517 }
518 }
519
nf_flow_snat_ipv6(const struct flow_offload * flow,struct sk_buff * skb,struct ipv6hdr * ip6h,unsigned int thoff,enum flow_offload_tuple_dir dir)520 static void nf_flow_snat_ipv6(const struct flow_offload *flow,
521 struct sk_buff *skb, struct ipv6hdr *ip6h,
522 unsigned int thoff,
523 enum flow_offload_tuple_dir dir)
524 {
525 struct in6_addr addr, new_addr;
526
527 switch (dir) {
528 case FLOW_OFFLOAD_DIR_ORIGINAL:
529 addr = ip6h->saddr;
530 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
531 ip6h->saddr = new_addr;
532 break;
533 case FLOW_OFFLOAD_DIR_REPLY:
534 addr = ip6h->daddr;
535 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
536 ip6h->daddr = new_addr;
537 break;
538 }
539
540 nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
541 }
542
nf_flow_dnat_ipv6(const struct flow_offload * flow,struct sk_buff * skb,struct ipv6hdr * ip6h,unsigned int thoff,enum flow_offload_tuple_dir dir)543 static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
544 struct sk_buff *skb, struct ipv6hdr *ip6h,
545 unsigned int thoff,
546 enum flow_offload_tuple_dir dir)
547 {
548 struct in6_addr addr, new_addr;
549
550 switch (dir) {
551 case FLOW_OFFLOAD_DIR_ORIGINAL:
552 addr = ip6h->daddr;
553 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
554 ip6h->daddr = new_addr;
555 break;
556 case FLOW_OFFLOAD_DIR_REPLY:
557 addr = ip6h->saddr;
558 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
559 ip6h->saddr = new_addr;
560 break;
561 }
562
563 nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
564 }
565
nf_flow_nat_ipv6(const struct flow_offload * flow,struct sk_buff * skb,enum flow_offload_tuple_dir dir,struct ipv6hdr * ip6h)566 static void nf_flow_nat_ipv6(const struct flow_offload *flow,
567 struct sk_buff *skb,
568 enum flow_offload_tuple_dir dir,
569 struct ipv6hdr *ip6h)
570 {
571 unsigned int thoff = sizeof(*ip6h);
572
573 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
574 nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
575 nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
576 }
577 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
578 nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
579 nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
580 }
581 }
582
nf_flow_tuple_ipv6(struct nf_flowtable_ctx * ctx,struct sk_buff * skb,struct flow_offload_tuple * tuple)583 static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
584 struct flow_offload_tuple *tuple)
585 {
586 struct flow_ports *ports;
587 struct ipv6hdr *ip6h;
588 unsigned int thoff;
589 u8 nexthdr;
590
591 thoff = sizeof(*ip6h) + ctx->offset;
592 if (!pskb_may_pull(skb, thoff))
593 return -1;
594
595 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
596
597 nexthdr = ip6h->nexthdr;
598 switch (nexthdr) {
599 case IPPROTO_TCP:
600 ctx->hdrsize = sizeof(struct tcphdr);
601 break;
602 case IPPROTO_UDP:
603 ctx->hdrsize = sizeof(struct udphdr);
604 break;
605 #ifdef CONFIG_NF_CT_PROTO_GRE
606 case IPPROTO_GRE:
607 ctx->hdrsize = sizeof(struct gre_base_hdr);
608 break;
609 #endif
610 default:
611 return -1;
612 }
613
614 if (ip6h->hop_limit <= 1)
615 return -1;
616
617 if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
618 return -1;
619
620 switch (nexthdr) {
621 case IPPROTO_TCP:
622 case IPPROTO_UDP:
623 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
624 tuple->src_port = ports->source;
625 tuple->dst_port = ports->dest;
626 break;
627 case IPPROTO_GRE: {
628 struct gre_base_hdr *greh;
629
630 greh = (struct gre_base_hdr *)(skb_network_header(skb) + thoff);
631 if ((greh->flags & GRE_VERSION) != GRE_VERSION_0)
632 return -1;
633 break;
634 }
635 }
636
637 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
638
639 tuple->src_v6 = ip6h->saddr;
640 tuple->dst_v6 = ip6h->daddr;
641 tuple->l3proto = AF_INET6;
642 tuple->l4proto = nexthdr;
643 tuple->iifidx = ctx->in->ifindex;
644 nf_flow_tuple_encap(skb, tuple);
645
646 return 0;
647 }
648
nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx * ctx,struct nf_flowtable * flow_table,struct flow_offload_tuple_rhash * tuplehash,struct sk_buff * skb)649 static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
650 struct nf_flowtable *flow_table,
651 struct flow_offload_tuple_rhash *tuplehash,
652 struct sk_buff *skb)
653 {
654 enum flow_offload_tuple_dir dir;
655 struct flow_offload *flow;
656 unsigned int thoff, mtu;
657 struct ipv6hdr *ip6h;
658
659 dir = tuplehash->tuple.dir;
660 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
661
662 mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
663 if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
664 return 0;
665
666 ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
667 thoff = sizeof(*ip6h) + ctx->offset;
668 if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
669 return 0;
670
671 if (!nf_flow_dst_check(&tuplehash->tuple)) {
672 flow_offload_teardown(flow);
673 return 0;
674 }
675
676 if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
677 return -1;
678
679 flow_offload_refresh(flow_table, flow, false);
680
681 nf_flow_encap_pop(skb, tuplehash);
682
683 ip6h = ipv6_hdr(skb);
684 nf_flow_nat_ipv6(flow, skb, dir, ip6h);
685
686 ip6h->hop_limit--;
687 skb_clear_tstamp(skb);
688
689 if (flow_table->flags & NF_FLOWTABLE_COUNTER)
690 nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
691
692 return 1;
693 }
694
695 static struct flow_offload_tuple_rhash *
nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx * ctx,struct nf_flowtable * flow_table,struct sk_buff * skb)696 nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
697 struct nf_flowtable *flow_table,
698 struct sk_buff *skb)
699 {
700 struct flow_offload_tuple tuple = {};
701
702 if (skb->protocol != htons(ETH_P_IPV6) &&
703 !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
704 return NULL;
705
706 if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
707 return NULL;
708
709 return flow_offload_lookup(flow_table, &tuple);
710 }
711
712 unsigned int
nf_flow_offload_ipv6_hook(void * priv,struct sk_buff * skb,const struct nf_hook_state * state)713 nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
714 const struct nf_hook_state *state)
715 {
716 struct flow_offload_tuple_rhash *tuplehash;
717 struct nf_flowtable *flow_table = priv;
718 enum flow_offload_tuple_dir dir;
719 struct nf_flowtable_ctx ctx = {
720 .in = state->in,
721 };
722 const struct in6_addr *nexthop;
723 struct flow_offload *flow;
724 struct net_device *outdev;
725 struct rt6_info *rt;
726 int ret;
727
728 tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb);
729 if (tuplehash == NULL)
730 return NF_ACCEPT;
731
732 ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
733 if (ret < 0)
734 return NF_DROP;
735 else if (ret == 0)
736 return NF_ACCEPT;
737
738 if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
739 rt = dst_rt6_info(tuplehash->tuple.dst_cache);
740 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
741 IP6CB(skb)->iif = skb->dev->ifindex;
742 IP6CB(skb)->flags = IP6SKB_FORWARDED;
743 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
744 }
745
746 dir = tuplehash->tuple.dir;
747 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
748
749 switch (tuplehash->tuple.xmit_type) {
750 case FLOW_OFFLOAD_XMIT_NEIGH:
751 rt = dst_rt6_info(tuplehash->tuple.dst_cache);
752 outdev = rt->dst.dev;
753 skb->dev = outdev;
754 nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
755 skb_dst_set_noref(skb, &rt->dst);
756 neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
757 ret = NF_STOLEN;
758 break;
759 case FLOW_OFFLOAD_XMIT_DIRECT:
760 ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
761 if (ret == NF_DROP)
762 flow_offload_teardown(flow);
763 break;
764 default:
765 WARN_ON_ONCE(1);
766 ret = NF_DROP;
767 break;
768 }
769
770 return ret;
771 }
772 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
773