Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * INET An implementation of the TCP/IP protocol suite for the LINUX
4 : * operating system. INET is implemented using the BSD Socket
5 : * interface as the means of communication with the user level.
6 : *
7 : * The Internet Protocol (IP) module.
8 : *
9 : * Authors: Ross Biro
10 : * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
11 : * Donald Becker, <becker@super.org>
12 : * Alan Cox, <alan@lxorguk.ukuu.org.uk>
13 : * Richard Underwood
14 : * Stefan Becker, <stefanb@yello.ping.de>
15 : * Jorge Cwik, <jorge@laser.satlink.net>
16 : * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
17 : *
18 : * Fixes:
19 : * Alan Cox : Commented a couple of minor bits of surplus code
20 : * Alan Cox : Undefining IP_FORWARD doesn't include the code
21 : * (just stops a compiler warning).
22 : * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes
23 : * are junked rather than corrupting things.
24 : * Alan Cox : Frames to bad broadcast subnets are dumped
25 : * We used to process them non broadcast and
26 : * boy could that cause havoc.
27 : * Alan Cox : ip_forward sets the free flag on the
28 : * new frame it queues. Still crap because
29 : * it copies the frame but at least it
30 : * doesn't eat memory too.
31 : * Alan Cox : Generic queue code and memory fixes.
32 : * Fred Van Kempen : IP fragment support (borrowed from NET2E)
33 : * Gerhard Koerting: Forward fragmented frames correctly.
34 : * Gerhard Koerting: Fixes to my fix of the above 8-).
35 : * Gerhard Koerting: IP interface addressing fix.
36 : * Linus Torvalds : More robustness checks
37 : * Alan Cox : Even more checks: Still not as robust as it ought to be
38 : * Alan Cox : Save IP header pointer for later
39 : * Alan Cox : ip option setting
40 : * Alan Cox : Use ip_tos/ip_ttl settings
41 : * Alan Cox : Fragmentation bogosity removed
42 : * (Thanks to Mark.Bush@prg.ox.ac.uk)
43 : * Dmitry Gorodchanin : Send of a raw packet crash fix.
44 : * Alan Cox : Silly ip bug when an overlength
45 : * fragment turns up. Now frees the
46 : * queue.
47 : * Linus Torvalds/ : Memory leakage on fragmentation
48 : * Alan Cox : handling.
49 : * Gerhard Koerting: Forwarding uses IP priority hints
50 : * Teemu Rantanen : Fragment problems.
51 : * Alan Cox : General cleanup, comments and reformat
52 : * Alan Cox : SNMP statistics
53 : * Alan Cox : BSD address rule semantics. Also see
54 : * UDP as there is a nasty checksum issue
55 : * if you do things the wrong way.
56 : * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file
57 : * Alan Cox : IP options adjust sk->priority.
58 : * Pedro Roque : Fix mtu/length error in ip_forward.
59 : * Alan Cox : Avoid ip_chk_addr when possible.
60 : * Richard Underwood : IP multicasting.
61 : * Alan Cox : Cleaned up multicast handlers.
62 : * Alan Cox : RAW sockets demultiplex in the BSD style.
63 : * Gunther Mayer : Fix the SNMP reporting typo
64 : * Alan Cox : Always in group 224.0.0.1
65 : * Pauline Middelink : Fast ip_checksum update when forwarding
66 : * Masquerading support.
67 : * Alan Cox : Multicast loopback error for 224.0.0.1
68 : * Alan Cox : IP_MULTICAST_LOOP option.
69 : * Alan Cox : Use notifiers.
70 : * Bjorn Ekwall : Removed ip_csum (from slhc.c too)
71 : * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!)
72 : * Stefan Becker : Send out ICMP HOST REDIRECT
73 : * Arnt Gulbrandsen : ip_build_xmit
74 : * Alan Cox : Per socket routing cache
75 : * Alan Cox : Fixed routing cache, added header cache.
76 : * Alan Cox : Loopback didn't work right in original ip_build_xmit - fixed it.
77 : * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net.
78 : * Alan Cox : Incoming IP option handling.
79 : * Alan Cox : Set saddr on raw output frames as per BSD.
80 : * Alan Cox : Stopped broadcast source route explosions.
81 : * Alan Cox : Can disable source routing
82 : * Takeshi Sone : Masquerading didn't work.
83 : * Dave Bonn,Alan Cox : Faster IP forwarding whenever possible.
84 : * Alan Cox : Memory leaks, tramples, misc debugging.
85 : * Alan Cox : Fixed multicast (by popular demand 8))
86 : * Alan Cox : Fixed forwarding (by even more popular demand 8))
87 : * Alan Cox : Fixed SNMP statistics [I think]
88 : * Gerhard Koerting : IP fragmentation forwarding fix
89 : * Alan Cox : Device lock against page fault.
90 : * Alan Cox : IP_HDRINCL facility.
91 : * Werner Almesberger : Zero fragment bug
92 : * Alan Cox : RAW IP frame length bug
93 : * Alan Cox : Outgoing firewall on build_xmit
94 : * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel
95 : * Alan Cox : Multicast routing hooks
96 : * Jos Vos : Do accounting *before* call_in_firewall
97 : * Willy Konynenberg : Transparent proxying support
98 : *
99 : * To Fix:
100 : * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient
101 : * and could be made very efficient with the addition of some virtual memory hacks to permit
102 : * the allocation of a buffer that can then be 'grown' by twiddling page tables.
103 : * Output fragmentation wants updating along with the buffer management to use a single
104 : * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet
105 : * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause
106 : * fragmentation anyway.
107 : */
108 :
109 : #define pr_fmt(fmt) "IPv4: " fmt
110 :
111 : #include <linux/module.h>
112 : #include <linux/types.h>
113 : #include <linux/kernel.h>
114 : #include <linux/string.h>
115 : #include <linux/errno.h>
116 : #include <linux/slab.h>
117 :
118 : #include <linux/net.h>
119 : #include <linux/socket.h>
120 : #include <linux/sockios.h>
121 : #include <linux/in.h>
122 : #include <linux/inet.h>
123 : #include <linux/inetdevice.h>
124 : #include <linux/netdevice.h>
125 : #include <linux/etherdevice.h>
126 : #include <linux/indirect_call_wrapper.h>
127 :
128 : #include <net/snmp.h>
129 : #include <net/ip.h>
130 : #include <net/protocol.h>
131 : #include <net/route.h>
132 : #include <linux/skbuff.h>
133 : #include <net/sock.h>
134 : #include <net/arp.h>
135 : #include <net/icmp.h>
136 : #include <net/raw.h>
137 : #include <net/checksum.h>
138 : #include <net/inet_ecn.h>
139 : #include <linux/netfilter_ipv4.h>
140 : #include <net/xfrm.h>
141 : #include <linux/mroute.h>
142 : #include <linux/netlink.h>
143 : #include <net/dst_metadata.h>
144 :
145 : /*
146 : * Process Router Attention IP option (RFC 2113)
147 : */
148 0 : bool ip_call_ra_chain(struct sk_buff *skb)
149 : {
150 0 : struct ip_ra_chain *ra;
151 0 : u8 protocol = ip_hdr(skb)->protocol;
152 0 : struct sock *last = NULL;
153 0 : struct net_device *dev = skb->dev;
154 0 : struct net *net = dev_net(dev);
155 :
156 0 : for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) {
157 0 : struct sock *sk = ra->sk;
158 :
159 : /* If socket is bound to an interface, only report
160 : * the packet if it came from that interface.
161 : */
162 0 : if (sk && inet_sk(sk)->inet_num == protocol &&
163 0 : (!sk->sk_bound_dev_if ||
164 0 : sk->sk_bound_dev_if == dev->ifindex)) {
165 0 : if (ip_is_fragment(ip_hdr(skb))) {
166 0 : if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
167 : return true;
168 : }
169 0 : if (last) {
170 0 : struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
171 0 : if (skb2)
172 0 : raw_rcv(last, skb2);
173 : }
174 : last = sk;
175 : }
176 : }
177 :
178 0 : if (last) {
179 0 : raw_rcv(last, skb);
180 0 : return true;
181 : }
182 : return false;
183 : }
184 :
185 : INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *));
186 : INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *));
187 454 : void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
188 : {
189 454 : const struct net_protocol *ipprot;
190 454 : int raw, ret;
191 :
192 454 : resubmit:
193 454 : raw = raw_local_deliver(skb, protocol);
194 :
195 454 : ipprot = rcu_dereference(inet_protos[protocol]);
196 454 : if (ipprot) {
197 454 : if (!ipprot->no_policy) {
198 454 : if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
199 : kfree_skb(skb);
200 : return;
201 : }
202 454 : nf_reset_ct(skb);
203 : }
204 454 : ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv,
205 : skb);
206 454 : if (ret < 0) {
207 0 : protocol = -ret;
208 0 : goto resubmit;
209 : }
210 454 : __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
211 : } else {
212 0 : if (!raw) {
213 0 : if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
214 0 : __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
215 0 : icmp_send(skb, ICMP_DEST_UNREACH,
216 : ICMP_PROT_UNREACH, 0);
217 : }
218 0 : kfree_skb(skb);
219 : } else {
220 0 : __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
221 0 : consume_skb(skb);
222 : }
223 : }
224 : }
225 :
226 454 : static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
227 : {
228 454 : __skb_pull(skb, skb_network_header_len(skb));
229 :
230 454 : rcu_read_lock();
231 454 : ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
232 454 : rcu_read_unlock();
233 :
234 454 : return 0;
235 : }
236 :
237 : /*
238 : * Deliver IP Packets to the higher protocol layers.
239 : */
240 454 : int ip_local_deliver(struct sk_buff *skb)
241 : {
242 : /*
243 : * Reassemble IP fragments.
244 : */
245 454 : struct net *net = dev_net(skb->dev);
246 :
247 454 : if (ip_is_fragment(ip_hdr(skb))) {
248 0 : if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
249 : return 0;
250 : }
251 :
252 454 : return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
253 : net, NULL, skb, skb->dev, NULL,
254 : ip_local_deliver_finish);
255 : }
256 : EXPORT_SYMBOL(ip_local_deliver);
257 :
258 0 : static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
259 : {
260 0 : struct ip_options *opt;
261 0 : const struct iphdr *iph;
262 :
263 : /* It looks as overkill, because not all
264 : IP options require packet mangling.
265 : But it is the easiest for now, especially taking
266 : into account that combination of IP options
267 : and running sniffer is extremely rare condition.
268 : --ANK (980813)
269 : */
270 0 : if (skb_cow(skb, skb_headroom(skb))) {
271 0 : __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS);
272 0 : goto drop;
273 : }
274 :
275 0 : iph = ip_hdr(skb);
276 0 : opt = &(IPCB(skb)->opt);
277 0 : opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
278 :
279 0 : if (ip_options_compile(dev_net(dev), opt, skb)) {
280 0 : __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
281 0 : goto drop;
282 : }
283 :
284 0 : if (unlikely(opt->srr)) {
285 0 : struct in_device *in_dev = __in_dev_get_rcu(dev);
286 :
287 0 : if (in_dev) {
288 0 : if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
289 0 : if (IN_DEV_LOG_MARTIANS(in_dev))
290 0 : net_info_ratelimited("source route option %pI4 -> %pI4\n",
291 : &iph->saddr,
292 : &iph->daddr);
293 0 : goto drop;
294 : }
295 : }
296 :
297 0 : if (ip_options_rcv_srr(skb, dev))
298 0 : goto drop;
299 : }
300 :
301 : return false;
302 : drop:
303 : return true;
304 : }
305 :
306 454 : static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
307 : const struct sk_buff *hint)
308 : {
309 454 : return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
310 49 : ip_hdr(hint)->tos == iph->tos;
311 : }
312 :
313 : INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
314 : INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
315 454 : static int ip_rcv_finish_core(struct net *net, struct sock *sk,
316 : struct sk_buff *skb, struct net_device *dev,
317 : const struct sk_buff *hint)
318 : {
319 454 : const struct iphdr *iph = ip_hdr(skb);
320 454 : int (*edemux)(struct sk_buff *skb);
321 454 : struct rtable *rt;
322 454 : int err;
323 :
324 454 : if (ip_can_use_hint(skb, iph, hint)) {
325 49 : err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
326 : dev, hint);
327 49 : if (unlikely(err))
328 0 : goto drop_error;
329 : }
330 :
331 908 : if (net->ipv4.sysctl_ip_early_demux &&
332 454 : !skb_dst(skb) &&
333 405 : !skb->sk &&
334 405 : !ip_is_fragment(iph)) {
335 405 : const struct net_protocol *ipprot;
336 405 : int protocol = iph->protocol;
337 :
338 405 : ipprot = rcu_dereference(inet_protos[protocol]);
339 405 : if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
340 391 : err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
341 : udp_v4_early_demux, skb);
342 391 : if (unlikely(err))
343 0 : goto drop_error;
344 : /* must reload iph, skb->head might have changed */
345 391 : iph = ip_hdr(skb);
346 : }
347 : }
348 :
349 : /*
350 : * Initialise the virtual path cache for the packet. It describes
351 : * how the packet travels inside Linux networking.
352 : */
353 454 : if (!skb_valid_dst(skb)) {
354 48 : err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
355 24 : iph->tos, dev);
356 24 : if (unlikely(err))
357 0 : goto drop_error;
358 : }
359 :
360 : #ifdef CONFIG_IP_ROUTE_CLASSID
361 : if (unlikely(skb_dst(skb)->tclassid)) {
362 : struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
363 : u32 idx = skb_dst(skb)->tclassid;
364 : st[idx&0xFF].o_packets++;
365 : st[idx&0xFF].o_bytes += skb->len;
366 : st[(idx>>16)&0xFF].i_packets++;
367 : st[(idx>>16)&0xFF].i_bytes += skb->len;
368 : }
369 : #endif
370 :
371 454 : if (iph->ihl > 5 && ip_rcv_options(skb, dev))
372 0 : goto drop;
373 :
374 454 : rt = skb_rtable(skb);
375 454 : if (rt->rt_type == RTN_MULTICAST) {
376 0 : __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len);
377 454 : } else if (rt->rt_type == RTN_BROADCAST) {
378 2 : __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
379 452 : } else if (skb->pkt_type == PACKET_BROADCAST ||
380 : skb->pkt_type == PACKET_MULTICAST) {
381 0 : struct in_device *in_dev = __in_dev_get_rcu(dev);
382 :
383 : /* RFC 1122 3.3.6:
384 : *
385 : * When a host sends a datagram to a link-layer broadcast
386 : * address, the IP destination address MUST be a legal IP
387 : * broadcast or IP multicast address.
388 : *
389 : * A host SHOULD silently discard a datagram that is received
390 : * via a link-layer broadcast (see Section 2.4) but does not
391 : * specify an IP multicast or broadcast destination address.
392 : *
393 : * This doesn't explicitly say L2 *broadcast*, but broadcast is
394 : * in a way a form of multicast and the most common use case for
395 : * this is 802.11 protecting against cross-station spoofing (the
396 : * so-called "hole-196" attack) so do it for both.
397 : */
398 0 : if (in_dev &&
399 0 : IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST))
400 0 : goto drop;
401 : }
402 :
403 : return NET_RX_SUCCESS;
404 :
405 0 : drop:
406 0 : kfree_skb(skb);
407 0 : return NET_RX_DROP;
408 :
409 0 : drop_error:
410 0 : if (err == -EXDEV)
411 0 : __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
412 0 : goto drop;
413 : }
414 :
415 0 : static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
416 : {
417 0 : struct net_device *dev = skb->dev;
418 0 : int ret;
419 :
420 : /* if ingress device is enslaved to an L3 master device pass the
421 : * skb to its handler for processing
422 : */
423 0 : skb = l3mdev_ip_rcv(skb);
424 0 : if (!skb)
425 : return NET_RX_SUCCESS;
426 :
427 0 : ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
428 0 : if (ret != NET_RX_DROP)
429 0 : ret = dst_input(skb);
430 : return ret;
431 : }
432 :
433 : /*
434 : * Main IP Receive routine.
435 : */
436 454 : static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
437 : {
438 454 : const struct iphdr *iph;
439 454 : u32 len;
440 :
441 : /* When the interface is in promisc. mode, drop all the crap
442 : * that it receives, do not try to analyse it.
443 : */
444 454 : if (skb->pkt_type == PACKET_OTHERHOST)
445 0 : goto drop;
446 :
447 454 : __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
448 :
449 454 : skb = skb_share_check(skb, GFP_ATOMIC);
450 454 : if (!skb) {
451 0 : __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
452 0 : goto out;
453 : }
454 :
455 454 : if (!pskb_may_pull(skb, sizeof(struct iphdr)))
456 0 : goto inhdr_error;
457 :
458 454 : iph = ip_hdr(skb);
459 :
460 : /*
461 : * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
462 : *
463 : * Is the datagram acceptable?
464 : *
465 : * 1. Length at least the size of an ip header
466 : * 2. Version of 4
467 : * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums]
468 : * 4. Doesn't have a bogus length
469 : */
470 :
471 454 : if (iph->ihl < 5 || iph->version != 4)
472 0 : goto inhdr_error;
473 :
474 454 : BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
475 454 : BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
476 454 : BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
477 454 : __IP_ADD_STATS(net,
478 : IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
479 : max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
480 :
481 454 : if (!pskb_may_pull(skb, iph->ihl*4))
482 0 : goto inhdr_error;
483 :
484 454 : iph = ip_hdr(skb);
485 :
486 454 : if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
487 0 : goto csum_error;
488 :
489 454 : len = ntohs(iph->tot_len);
490 454 : if (skb->len < len) {
491 0 : __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
492 0 : goto drop;
493 454 : } else if (len < (iph->ihl*4))
494 0 : goto inhdr_error;
495 :
496 : /* Our transport medium may have padded the buffer out. Now we know it
497 : * is IP we can trim to the true length of the frame.
498 : * Note this now means skb->len holds ntohs(iph->tot_len).
499 : */
500 454 : if (pskb_trim_rcsum(skb, len)) {
501 0 : __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS);
502 0 : goto drop;
503 : }
504 :
505 454 : iph = ip_hdr(skb);
506 454 : skb->transport_header = skb->network_header + iph->ihl*4;
507 :
508 : /* Remove any debris in the socket control block */
509 454 : memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
510 454 : IPCB(skb)->iif = skb->skb_iif;
511 :
512 : /* Must drop socket now because of tproxy. */
513 454 : if (!skb_sk_is_prefetched(skb))
514 454 : skb_orphan(skb);
515 :
516 : return skb;
517 :
518 0 : csum_error:
519 0 : __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
520 0 : inhdr_error:
521 0 : __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
522 0 : drop:
523 0 : kfree_skb(skb);
524 : out:
525 : return NULL;
526 : }
527 :
528 : /*
529 : * IP receive entry point
530 : */
531 0 : int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
532 : struct net_device *orig_dev)
533 : {
534 0 : struct net *net = dev_net(dev);
535 :
536 0 : skb = ip_rcv_core(skb, net);
537 0 : if (skb == NULL)
538 : return NET_RX_DROP;
539 :
540 0 : return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
541 : net, NULL, skb, dev, NULL,
542 : ip_rcv_finish);
543 : }
544 :
545 405 : static void ip_sublist_rcv_finish(struct list_head *head)
546 : {
547 405 : struct sk_buff *skb, *next;
548 :
549 859 : list_for_each_entry_safe(skb, next, head, list) {
550 454 : skb_list_del_init(skb);
551 454 : dst_input(skb);
552 : }
553 405 : }
554 :
555 405 : static struct sk_buff *ip_extract_route_hint(const struct net *net,
556 : struct sk_buff *skb, int rt_type)
557 : {
558 405 : if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
559 2 : return NULL;
560 :
561 : return skb;
562 : }
563 :
564 405 : static void ip_list_rcv_finish(struct net *net, struct sock *sk,
565 : struct list_head *head)
566 : {
567 405 : struct sk_buff *skb, *next, *hint = NULL;
568 405 : struct dst_entry *curr_dst = NULL;
569 405 : struct list_head sublist;
570 :
571 405 : INIT_LIST_HEAD(&sublist);
572 859 : list_for_each_entry_safe(skb, next, head, list) {
573 454 : struct net_device *dev = skb->dev;
574 454 : struct dst_entry *dst;
575 :
576 454 : skb_list_del_init(skb);
577 : /* if ingress device is enslaved to an L3 master device pass the
578 : * skb to its handler for processing
579 : */
580 454 : skb = l3mdev_ip_rcv(skb);
581 454 : if (!skb)
582 0 : continue;
583 454 : if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
584 0 : continue;
585 :
586 454 : dst = skb_dst(skb);
587 454 : if (curr_dst != dst) {
588 810 : hint = ip_extract_route_hint(net, skb,
589 405 : ((struct rtable *)dst)->rt_type);
590 :
591 : /* dispatch old sublist */
592 405 : if (!list_empty(&sublist))
593 0 : ip_sublist_rcv_finish(&sublist);
594 : /* start new sublist */
595 405 : INIT_LIST_HEAD(&sublist);
596 405 : curr_dst = dst;
597 : }
598 454 : list_add_tail(&skb->list, &sublist);
599 : }
600 : /* dispatch final sublist */
601 405 : ip_sublist_rcv_finish(&sublist);
602 405 : }
603 :
604 405 : static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
605 : struct net *net)
606 : {
607 405 : NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
608 : head, dev, NULL, ip_rcv_finish);
609 405 : ip_list_rcv_finish(net, NULL, head);
610 405 : }
611 :
612 : /* Receive a list of IP packets */
613 405 : void ip_list_rcv(struct list_head *head, struct packet_type *pt,
614 : struct net_device *orig_dev)
615 : {
616 405 : struct net_device *curr_dev = NULL;
617 405 : struct net *curr_net = NULL;
618 405 : struct sk_buff *skb, *next;
619 405 : struct list_head sublist;
620 :
621 405 : INIT_LIST_HEAD(&sublist);
622 859 : list_for_each_entry_safe(skb, next, head, list) {
623 454 : struct net_device *dev = skb->dev;
624 454 : struct net *net = dev_net(dev);
625 :
626 454 : skb_list_del_init(skb);
627 454 : skb = ip_rcv_core(skb, net);
628 454 : if (skb == NULL)
629 0 : continue;
630 :
631 454 : if (curr_dev != dev || curr_net != net) {
632 : /* dispatch old sublist */
633 405 : if (!list_empty(&sublist))
634 0 : ip_sublist_rcv(&sublist, curr_dev, curr_net);
635 : /* start new sublist */
636 405 : INIT_LIST_HEAD(&sublist);
637 405 : curr_dev = dev;
638 405 : curr_net = net;
639 : }
640 454 : list_add_tail(&skb->list, &sublist);
641 : }
642 : /* dispatch final sublist */
643 405 : if (!list_empty(&sublist))
644 405 : ip_sublist_rcv(&sublist, curr_dev, curr_net);
645 405 : }
|