LCOV - code coverage report
Current view: top level - net/ipv4 - route.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 420 1655 25.4 %
Date: 2021-04-22 12:43:58 Functions: 32 93 34.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * INET         An implementation of the TCP/IP protocol suite for the LINUX
       4             :  *              operating system.  INET is implemented using the  BSD Socket
       5             :  *              interface as the means of communication with the user level.
       6             :  *
       7             :  *              ROUTE - implementation of the IP router.
       8             :  *
       9             :  * Authors:     Ross Biro
      10             :  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
      11             :  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
      12             :  *              Linus Torvalds, <Linus.Torvalds@helsinki.fi>
      13             :  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
      14             :  *
      15             :  * Fixes:
      16             :  *              Alan Cox        :       Verify area fixes.
      17             :  *              Alan Cox        :       cli() protects routing changes
      18             :  *              Rui Oliveira    :       ICMP routing table updates
      19             :  *              (rco@di.uminho.pt)      Routing table insertion and update
      20             :  *              Linus Torvalds  :       Rewrote bits to be sensible
      21             :  *              Alan Cox        :       Added BSD route gw semantics
      22             :  *              Alan Cox        :       Super /proc >4K
      23             :  *              Alan Cox        :       MTU in route table
      24             :  *              Alan Cox        :       MSS actually. Also added the window
      25             :  *                                      clamper.
      26             :  *              Sam Lantinga    :       Fixed route matching in rt_del()
      27             :  *              Alan Cox        :       Routing cache support.
      28             :  *              Alan Cox        :       Removed compatibility cruft.
      29             :  *              Alan Cox        :       RTF_REJECT support.
      30             :  *              Alan Cox        :       TCP irtt support.
      31             :  *              Jonathan Naylor :       Added Metric support.
      32             :  *      Miquel van Smoorenburg  :       BSD API fixes.
      33             :  *      Miquel van Smoorenburg  :       Metrics.
      34             :  *              Alan Cox        :       Use __u32 properly
      35             :  *              Alan Cox        :       Aligned routing errors more closely with BSD
      36             :  *                                      our system is still very different.
      37             :  *              Alan Cox        :       Faster /proc handling
      38             :  *      Alexey Kuznetsov        :       Massive rework to support tree based routing,
      39             :  *                                      routing caches and better behaviour.
      40             :  *
      41             :  *              Olaf Erb        :       irtt wasn't being copied right.
      42             :  *              Bjorn Ekwall    :       Kerneld route support.
      43             :  *              Alan Cox        :       Multicast fixed (I hope)
      44             :  *              Pavel Krauz     :       Limited broadcast fixed
      45             :  *              Mike McLagan    :       Routing by source
      46             :  *      Alexey Kuznetsov        :       End of old history. Split to fib.c and
      47             :  *                                      route.c and rewritten from scratch.
      48             :  *              Andi Kleen      :       Load-limit warning messages.
      49             :  *      Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
      50             :  *      Vitaly E. Lavrov        :       Race condition in ip_route_input_slow.
      51             :  *      Tobias Ringstrom        :       Uninitialized res.type in ip_route_output_slow.
      52             :  *      Vladimir V. Ivanov      :       IP rule info (flowid) is really useful.
      53             :  *              Marc Boucher    :       routing by fwmark
      54             :  *      Robert Olsson           :       Added rt_cache statistics
      55             :  *      Arnaldo C. Melo         :       Convert proc stuff to seq_file
      56             :  *      Eric Dumazet            :       hashed spinlocks and rt_check_expire() fixes.
      57             :  *      Ilia Sotnikov           :       Ignore TOS on PMTUD and Redirect
      58             :  *      Ilia Sotnikov           :       Removed TOS from hash calculations
      59             :  */
      60             : 
      61             : #define pr_fmt(fmt) "IPv4: " fmt
      62             : 
      63             : #include <linux/module.h>
      64             : #include <linux/uaccess.h>
      65             : #include <linux/bitops.h>
      66             : #include <linux/types.h>
      67             : #include <linux/kernel.h>
      68             : #include <linux/mm.h>
      69             : #include <linux/string.h>
      70             : #include <linux/socket.h>
      71             : #include <linux/sockios.h>
      72             : #include <linux/errno.h>
      73             : #include <linux/in.h>
      74             : #include <linux/inet.h>
      75             : #include <linux/netdevice.h>
      76             : #include <linux/proc_fs.h>
      77             : #include <linux/init.h>
      78             : #include <linux/skbuff.h>
      79             : #include <linux/inetdevice.h>
      80             : #include <linux/igmp.h>
      81             : #include <linux/pkt_sched.h>
      82             : #include <linux/mroute.h>
      83             : #include <linux/netfilter_ipv4.h>
      84             : #include <linux/random.h>
      85             : #include <linux/rcupdate.h>
      86             : #include <linux/times.h>
      87             : #include <linux/slab.h>
      88             : #include <linux/jhash.h>
      89             : #include <net/dst.h>
      90             : #include <net/dst_metadata.h>
      91             : #include <net/net_namespace.h>
      92             : #include <net/protocol.h>
      93             : #include <net/ip.h>
      94             : #include <net/route.h>
      95             : #include <net/inetpeer.h>
      96             : #include <net/sock.h>
      97             : #include <net/ip_fib.h>
      98             : #include <net/nexthop.h>
      99             : #include <net/arp.h>
     100             : #include <net/tcp.h>
     101             : #include <net/icmp.h>
     102             : #include <net/xfrm.h>
     103             : #include <net/lwtunnel.h>
     104             : #include <net/netevent.h>
     105             : #include <net/rtnetlink.h>
     106             : #ifdef CONFIG_SYSCTL
     107             : #include <linux/sysctl.h>
     108             : #endif
     109             : #include <net/secure_seq.h>
     110             : #include <net/ip_tunnels.h>
     111             : #include <net/l3mdev.h>
     112             : 
     113             : #include "fib_lookup.h"
     114             : 
     115             : #define RT_FL_TOS(oldflp4) \
     116             :         ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
     117             : 
     118             : #define RT_GC_TIMEOUT (300*HZ)
     119             : 
     120             : static int ip_rt_max_size;
     121             : static int ip_rt_redirect_number __read_mostly  = 9;
     122             : static int ip_rt_redirect_load __read_mostly    = HZ / 50;
     123             : static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
     124             : static int ip_rt_error_cost __read_mostly       = HZ;
     125             : static int ip_rt_error_burst __read_mostly      = 5 * HZ;
     126             : static int ip_rt_mtu_expires __read_mostly      = 10 * 60 * HZ;
     127             : static u32 ip_rt_min_pmtu __read_mostly         = 512 + 20 + 20;
     128             : static int ip_rt_min_advmss __read_mostly       = 256;
     129             : 
     130             : static int ip_rt_gc_timeout __read_mostly       = RT_GC_TIMEOUT;
     131             : 
     132             : /*
     133             :  *      Interface to generic destination cache.
     134             :  */
     135             : 
     136             : INDIRECT_CALLABLE_SCOPE
     137             : struct dst_entry        *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
     138             : static unsigned int      ipv4_default_advmss(const struct dst_entry *dst);
     139             : INDIRECT_CALLABLE_SCOPE
     140             : unsigned int            ipv4_mtu(const struct dst_entry *dst);
     141             : static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
     142             : static void              ipv4_link_failure(struct sk_buff *skb);
     143             : static void              ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
     144             :                                            struct sk_buff *skb, u32 mtu,
     145             :                                            bool confirm_neigh);
     146             : static void              ip_do_redirect(struct dst_entry *dst, struct sock *sk,
     147             :                                         struct sk_buff *skb);
     148             : static void             ipv4_dst_destroy(struct dst_entry *dst);
     149             : 
     150           0 : static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
     151             : {
     152           0 :         WARN_ON(1);
     153           0 :         return NULL;
     154             : }
     155             : 
     156             : static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
     157             :                                            struct sk_buff *skb,
     158             :                                            const void *daddr);
     159             : static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr);
     160             : 
     161             : static struct dst_ops ipv4_dst_ops = {
     162             :         .family =               AF_INET,
     163             :         .check =                ipv4_dst_check,
     164             :         .default_advmss =       ipv4_default_advmss,
     165             :         .mtu =                  ipv4_mtu,
     166             :         .cow_metrics =          ipv4_cow_metrics,
     167             :         .destroy =              ipv4_dst_destroy,
     168             :         .negative_advice =      ipv4_negative_advice,
     169             :         .link_failure =         ipv4_link_failure,
     170             :         .update_pmtu =          ip_rt_update_pmtu,
     171             :         .redirect =             ip_do_redirect,
     172             :         .local_out =            __ip_local_out,
     173             :         .neigh_lookup =         ipv4_neigh_lookup,
     174             :         .confirm_neigh =        ipv4_confirm_neigh,
     175             : };
     176             : 
     177             : #define ECN_OR_COST(class)      TC_PRIO_##class
     178             : 
     179             : const __u8 ip_tos2prio[16] = {
     180             :         TC_PRIO_BESTEFFORT,
     181             :         ECN_OR_COST(BESTEFFORT),
     182             :         TC_PRIO_BESTEFFORT,
     183             :         ECN_OR_COST(BESTEFFORT),
     184             :         TC_PRIO_BULK,
     185             :         ECN_OR_COST(BULK),
     186             :         TC_PRIO_BULK,
     187             :         ECN_OR_COST(BULK),
     188             :         TC_PRIO_INTERACTIVE,
     189             :         ECN_OR_COST(INTERACTIVE),
     190             :         TC_PRIO_INTERACTIVE,
     191             :         ECN_OR_COST(INTERACTIVE),
     192             :         TC_PRIO_INTERACTIVE_BULK,
     193             :         ECN_OR_COST(INTERACTIVE_BULK),
     194             :         TC_PRIO_INTERACTIVE_BULK,
     195             :         ECN_OR_COST(INTERACTIVE_BULK)
     196             : };
     197             : EXPORT_SYMBOL(ip_tos2prio);
     198             : 
     199             : static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
     200             : #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
     201             : 
     202             : #ifdef CONFIG_PROC_FS
     203           0 : static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
     204             : {
     205           0 :         if (*pos)
     206           0 :                 return NULL;
     207             :         return SEQ_START_TOKEN;
     208             : }
     209             : 
     210           0 : static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
     211             : {
     212           0 :         ++*pos;
     213           0 :         return NULL;
     214             : }
     215             : 
     216           0 : static void rt_cache_seq_stop(struct seq_file *seq, void *v)
     217             : {
     218           0 : }
     219             : 
     220           0 : static int rt_cache_seq_show(struct seq_file *seq, void *v)
     221             : {
     222           0 :         if (v == SEQ_START_TOKEN)
     223           0 :                 seq_printf(seq, "%-127s\n",
     224             :                            "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
     225             :                            "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
     226             :                            "HHUptod\tSpecDst");
     227           0 :         return 0;
     228             : }
     229             : 
     230             : static const struct seq_operations rt_cache_seq_ops = {
     231             :         .start  = rt_cache_seq_start,
     232             :         .next   = rt_cache_seq_next,
     233             :         .stop   = rt_cache_seq_stop,
     234             :         .show   = rt_cache_seq_show,
     235             : };
     236             : 
     237           0 : static int rt_cache_seq_open(struct inode *inode, struct file *file)
     238             : {
     239           0 :         return seq_open(file, &rt_cache_seq_ops);
     240             : }
     241             : 
     242             : static const struct proc_ops rt_cache_proc_ops = {
     243             :         .proc_open      = rt_cache_seq_open,
     244             :         .proc_read      = seq_read,
     245             :         .proc_lseek     = seq_lseek,
     246             :         .proc_release   = seq_release,
     247             : };
     248             : 
     249             : 
     250           0 : static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
     251             : {
     252           0 :         int cpu;
     253             : 
     254           0 :         if (*pos == 0)
     255             :                 return SEQ_START_TOKEN;
     256             : 
     257           0 :         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
     258           0 :                 if (!cpu_possible(cpu))
     259           0 :                         continue;
     260           0 :                 *pos = cpu+1;
     261           0 :                 return &per_cpu(rt_cache_stat, cpu);
     262             :         }
     263             :         return NULL;
     264             : }
     265             : 
     266           0 : static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
     267             : {
     268           0 :         int cpu;
     269             : 
     270           0 :         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
     271           0 :                 if (!cpu_possible(cpu))
     272           0 :                         continue;
     273           0 :                 *pos = cpu+1;
     274           0 :                 return &per_cpu(rt_cache_stat, cpu);
     275             :         }
     276           0 :         (*pos)++;
     277           0 :         return NULL;
     278             : 
     279             : }
     280             : 
     281           0 : static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
     282             : {
     283             : 
     284           0 : }
     285             : 
     286           0 : static int rt_cpu_seq_show(struct seq_file *seq, void *v)
     287             : {
     288           0 :         struct rt_cache_stat *st = v;
     289             : 
     290           0 :         if (v == SEQ_START_TOKEN) {
     291           0 :                 seq_printf(seq, "entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
     292           0 :                 return 0;
     293             :         }
     294             : 
     295           0 :         seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
     296             :                    " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
     297             :                    dst_entries_get_slow(&ipv4_dst_ops),
     298             :                    0, /* st->in_hit */
     299             :                    st->in_slow_tot,
     300             :                    st->in_slow_mc,
     301             :                    st->in_no_route,
     302             :                    st->in_brd,
     303             :                    st->in_martian_dst,
     304             :                    st->in_martian_src,
     305             : 
     306             :                    0, /* st->out_hit */
     307             :                    st->out_slow_tot,
     308             :                    st->out_slow_mc,
     309             : 
     310             :                    0, /* st->gc_total */
     311             :                    0, /* st->gc_ignored */
     312             :                    0, /* st->gc_goal_miss */
     313             :                    0, /* st->gc_dst_overflow */
     314             :                    0, /* st->in_hlist_search */
     315             :                    0  /* st->out_hlist_search */
     316             :                 );
     317           0 :         return 0;
     318             : }
     319             : 
     320             : static const struct seq_operations rt_cpu_seq_ops = {
     321             :         .start  = rt_cpu_seq_start,
     322             :         .next   = rt_cpu_seq_next,
     323             :         .stop   = rt_cpu_seq_stop,
     324             :         .show   = rt_cpu_seq_show,
     325             : };
     326             : 
     327             : 
     328           0 : static int rt_cpu_seq_open(struct inode *inode, struct file *file)
     329             : {
     330           0 :         return seq_open(file, &rt_cpu_seq_ops);
     331             : }
     332             : 
     333             : static const struct proc_ops rt_cpu_proc_ops = {
     334             :         .proc_open      = rt_cpu_seq_open,
     335             :         .proc_read      = seq_read,
     336             :         .proc_lseek     = seq_lseek,
     337             :         .proc_release   = seq_release,
     338             : };
     339             : 
     340             : #ifdef CONFIG_IP_ROUTE_CLASSID
     341             : static int rt_acct_proc_show(struct seq_file *m, void *v)
     342             : {
     343             :         struct ip_rt_acct *dst, *src;
     344             :         unsigned int i, j;
     345             : 
     346             :         dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
     347             :         if (!dst)
     348             :                 return -ENOMEM;
     349             : 
     350             :         for_each_possible_cpu(i) {
     351             :                 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
     352             :                 for (j = 0; j < 256; j++) {
     353             :                         dst[j].o_bytes   += src[j].o_bytes;
     354             :                         dst[j].o_packets += src[j].o_packets;
     355             :                         dst[j].i_bytes   += src[j].i_bytes;
     356             :                         dst[j].i_packets += src[j].i_packets;
     357             :                 }
     358             :         }
     359             : 
     360             :         seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
     361             :         kfree(dst);
     362             :         return 0;
     363             : }
     364             : #endif
     365             : 
     366           1 : static int __net_init ip_rt_do_proc_init(struct net *net)
     367             : {
     368           1 :         struct proc_dir_entry *pde;
     369             : 
     370           1 :         pde = proc_create("rt_cache", 0444, net->proc_net,
     371             :                           &rt_cache_proc_ops);
     372           1 :         if (!pde)
     373           0 :                 goto err1;
     374             : 
     375           1 :         pde = proc_create("rt_cache", 0444,
     376             :                           net->proc_net_stat, &rt_cpu_proc_ops);
     377           1 :         if (!pde)
     378           0 :                 goto err2;
     379             : 
     380             : #ifdef CONFIG_IP_ROUTE_CLASSID
     381             :         pde = proc_create_single("rt_acct", 0, net->proc_net,
     382             :                         rt_acct_proc_show);
     383             :         if (!pde)
     384             :                 goto err3;
     385             : #endif
     386             :         return 0;
     387             : 
     388             : #ifdef CONFIG_IP_ROUTE_CLASSID
     389             : err3:
     390             :         remove_proc_entry("rt_cache", net->proc_net_stat);
     391             : #endif
     392           0 : err2:
     393           0 :         remove_proc_entry("rt_cache", net->proc_net);
     394             : err1:
     395             :         return -ENOMEM;
     396             : }
     397             : 
     398           0 : static void __net_exit ip_rt_do_proc_exit(struct net *net)
     399             : {
     400           0 :         remove_proc_entry("rt_cache", net->proc_net_stat);
     401           0 :         remove_proc_entry("rt_cache", net->proc_net);
     402             : #ifdef CONFIG_IP_ROUTE_CLASSID
     403             :         remove_proc_entry("rt_acct", net->proc_net);
     404             : #endif
     405           0 : }
     406             : 
     407             : static struct pernet_operations ip_rt_proc_ops __net_initdata =  {
     408             :         .init = ip_rt_do_proc_init,
     409             :         .exit = ip_rt_do_proc_exit,
     410             : };
     411             : 
     412           1 : static int __init ip_rt_proc_init(void)
     413             : {
     414           1 :         return register_pernet_subsys(&ip_rt_proc_ops);
     415             : }
     416             : 
     417             : #else
     418             : static inline int ip_rt_proc_init(void)
     419             : {
     420             :         return 0;
     421             : }
     422             : #endif /* CONFIG_PROC_FS */
     423             : 
     424        1292 : static inline bool rt_is_expired(const struct rtable *rth)
     425             : {
     426        1292 :         return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
     427             : }
     428             : 
     429          16 : void rt_cache_flush(struct net *net)
     430             : {
     431          16 :         rt_genid_bump_ipv4(net);
     432          14 : }
     433             : 
     434           2 : static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
     435             :                                            struct sk_buff *skb,
     436             :                                            const void *daddr)
     437             : {
     438           2 :         const struct rtable *rt = container_of(dst, struct rtable, dst);
     439           2 :         struct net_device *dev = dst->dev;
     440           2 :         struct neighbour *n;
     441             : 
     442           2 :         rcu_read_lock_bh();
     443             : 
     444           2 :         if (likely(rt->rt_gw_family == AF_INET)) {
     445           0 :                 n = ip_neigh_gw4(dev, rt->rt_gw4);
     446           2 :         } else if (rt->rt_gw_family == AF_INET6) {
     447           0 :                 n = ip_neigh_gw6(dev, &rt->rt_gw6);
     448             :         } else {
     449           2 :                 __be32 pkey;
     450             : 
     451           2 :                 pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr);
     452           2 :                 n = ip_neigh_gw4(dev, pkey);
     453             :         }
     454             : 
     455           2 :         if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt))
     456           0 :                 n = NULL;
     457             : 
     458           2 :         rcu_read_unlock_bh();
     459             : 
     460           2 :         return n;
     461             : }
     462             : 
     463           0 : static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
     464             : {
     465           0 :         const struct rtable *rt = container_of(dst, struct rtable, dst);
     466           0 :         struct net_device *dev = dst->dev;
     467           0 :         const __be32 *pkey = daddr;
     468             : 
     469           0 :         if (rt->rt_gw_family == AF_INET) {
     470           0 :                 pkey = (const __be32 *)&rt->rt_gw4;
     471           0 :         } else if (rt->rt_gw_family == AF_INET6) {
     472           0 :                 return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6);
     473           0 :         } else if (!daddr ||
     474           0 :                  (rt->rt_flags &
     475             :                   (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) {
     476             :                 return;
     477             :         }
     478           0 :         __ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
     479             : }
     480             : 
     481             : #define IP_IDENTS_SZ 2048u
     482             : 
     483             : static atomic_t *ip_idents __read_mostly;
     484             : static u32 *ip_tstamps __read_mostly;
     485             : 
     486             : /* In order to protect privacy, we add a perturbation to identifiers
     487             :  * if one generator is seldom used. This makes hard for an attacker
     488             :  * to infer how many packets were sent between two points in time.
     489             :  */
     490          14 : u32 ip_idents_reserve(u32 hash, int segs)
     491             : {
     492          14 :         u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
     493          14 :         atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
     494          14 :         u32 old = READ_ONCE(*p_tstamp);
     495          14 :         u32 now = (u32)jiffies;
     496          14 :         u32 delta = 0;
     497             : 
     498          14 :         if (old != now && cmpxchg(p_tstamp, old, now) == old)
     499          14 :                 delta = prandom_u32_max(now - old);
     500             : 
     501             :         /* If UBSAN reports an error there, please make sure your compiler
     502             :          * supports -fno-strict-overflow before reporting it that was a bug
     503             :          * in UBSAN, and it has been fixed in GCC-8.
     504             :          */
     505          14 :         return atomic_add_return(segs + delta, p_id) - segs;
     506             : }
     507             : EXPORT_SYMBOL(ip_idents_reserve);
     508             : 
     509          14 : void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
     510             : {
     511          14 :         u32 hash, id;
     512             : 
     513             :         /* Note the following code is not safe, but this is okay. */
     514          14 :         if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
     515           1 :                 get_random_bytes(&net->ipv4.ip_id_key,
     516             :                                  sizeof(net->ipv4.ip_id_key));
     517             : 
     518          28 :         hash = siphash_3u32((__force u32)iph->daddr,
     519          14 :                             (__force u32)iph->saddr,
     520          14 :                             iph->protocol,
     521          14 :                             &net->ipv4.ip_id_key);
     522          14 :         id = ip_idents_reserve(hash, segs);
     523          14 :         iph->id = htons(id);
     524          14 : }
     525             : EXPORT_SYMBOL(__ip_select_ident);
     526             : 
     527           0 : static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
     528             :                              const struct sock *sk,
     529             :                              const struct iphdr *iph,
     530             :                              int oif, u8 tos,
     531             :                              u8 prot, u32 mark, int flow_flags)
     532             : {
     533           0 :         if (sk) {
     534           0 :                 const struct inet_sock *inet = inet_sk(sk);
     535             : 
     536           0 :                 oif = sk->sk_bound_dev_if;
     537           0 :                 mark = sk->sk_mark;
     538           0 :                 tos = RT_CONN_FLAGS(sk);
     539           0 :                 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
     540             :         }
     541           0 :         flowi4_init_output(fl4, oif, mark, tos,
     542             :                            RT_SCOPE_UNIVERSE, prot,
     543             :                            flow_flags,
     544             :                            iph->daddr, iph->saddr, 0, 0,
     545             :                            sock_net_uid(net, sk));
     546           0 : }
     547             : 
     548           0 : static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
     549             :                                const struct sock *sk)
     550             : {
     551           0 :         const struct net *net = dev_net(skb->dev);
     552           0 :         const struct iphdr *iph = ip_hdr(skb);
     553           0 :         int oif = skb->dev->ifindex;
     554           0 :         u8 tos = RT_TOS(iph->tos);
     555           0 :         u8 prot = iph->protocol;
     556           0 :         u32 mark = skb->mark;
     557             : 
     558           0 :         __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
     559           0 : }
     560             : 
     561           0 : static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
     562             : {
     563           0 :         const struct inet_sock *inet = inet_sk(sk);
     564           0 :         const struct ip_options_rcu *inet_opt;
     565           0 :         __be32 daddr = inet->inet_daddr;
     566             : 
     567           0 :         rcu_read_lock();
     568           0 :         inet_opt = rcu_dereference(inet->inet_opt);
     569           0 :         if (inet_opt && inet_opt->opt.srr)
     570           0 :                 daddr = inet_opt->opt.faddr;
     571           0 :         flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
     572           0 :                            RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
     573           0 :                            inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
     574           0 :                            inet_sk_flowi_flags(sk),
     575             :                            daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
     576           0 :         rcu_read_unlock();
     577           0 : }
     578             : 
     579           0 : static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
     580             :                                  const struct sk_buff *skb)
     581             : {
     582           0 :         if (skb)
     583           0 :                 build_skb_flow_key(fl4, skb, sk);
     584             :         else
     585           0 :                 build_sk_flow_key(fl4, sk);
     586           0 : }
     587             : 
     588             : static DEFINE_SPINLOCK(fnhe_lock);
     589             : 
     590           0 : static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
     591             : {
     592           0 :         struct rtable *rt;
     593             : 
     594           0 :         rt = rcu_dereference(fnhe->fnhe_rth_input);
     595           0 :         if (rt) {
     596           0 :                 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
     597           0 :                 dst_dev_put(&rt->dst);
     598           0 :                 dst_release(&rt->dst);
     599             :         }
     600           0 :         rt = rcu_dereference(fnhe->fnhe_rth_output);
     601           0 :         if (rt) {
     602           0 :                 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
     603           0 :                 dst_dev_put(&rt->dst);
     604           0 :                 dst_release(&rt->dst);
     605             :         }
     606           0 : }
     607             : 
     608           0 : static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
     609             : {
     610           0 :         struct fib_nh_exception *fnhe, *oldest;
     611             : 
     612           0 :         oldest = rcu_dereference(hash->chain);
     613           0 :         for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
     614           0 :              fnhe = rcu_dereference(fnhe->fnhe_next)) {
     615           0 :                 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
     616           0 :                         oldest = fnhe;
     617             :         }
     618           0 :         fnhe_flush_routes(oldest);
     619           0 :         return oldest;
     620             : }
     621             : 
     622           0 : static inline u32 fnhe_hashfun(__be32 daddr)
     623             : {
     624           0 :         static u32 fnhe_hashrnd __read_mostly;
     625           0 :         u32 hval;
     626             : 
     627           0 :         net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
     628           0 :         hval = jhash_1word((__force u32)daddr, fnhe_hashrnd);
     629           0 :         return hash_32(hval, FNHE_HASH_SHIFT);
     630             : }
     631             : 
     632           0 : static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
     633             : {
     634           0 :         rt->rt_pmtu = fnhe->fnhe_pmtu;
     635           0 :         rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
     636           0 :         rt->dst.expires = fnhe->fnhe_expires;
     637             : 
     638           0 :         if (fnhe->fnhe_gw) {
     639           0 :                 rt->rt_flags |= RTCF_REDIRECTED;
     640           0 :                 rt->rt_uses_gateway = 1;
     641           0 :                 rt->rt_gw_family = AF_INET;
     642           0 :                 rt->rt_gw4 = fnhe->fnhe_gw;
     643             :         }
     644           0 : }
     645             : 
     646           0 : static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
     647             :                                   __be32 gw, u32 pmtu, bool lock,
     648             :                                   unsigned long expires)
     649             : {
     650           0 :         struct fnhe_hash_bucket *hash;
     651           0 :         struct fib_nh_exception *fnhe;
     652           0 :         struct rtable *rt;
     653           0 :         u32 genid, hval;
     654           0 :         unsigned int i;
     655           0 :         int depth;
     656             : 
     657           0 :         genid = fnhe_genid(dev_net(nhc->nhc_dev));
     658           0 :         hval = fnhe_hashfun(daddr);
     659             : 
     660           0 :         spin_lock_bh(&fnhe_lock);
     661             : 
     662           0 :         hash = rcu_dereference(nhc->nhc_exceptions);
     663           0 :         if (!hash) {
     664           0 :                 hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
     665           0 :                 if (!hash)
     666           0 :                         goto out_unlock;
     667           0 :                 rcu_assign_pointer(nhc->nhc_exceptions, hash);
     668             :         }
     669             : 
     670           0 :         hash += hval;
     671             : 
     672           0 :         depth = 0;
     673           0 :         for (fnhe = rcu_dereference(hash->chain); fnhe;
     674           0 :              fnhe = rcu_dereference(fnhe->fnhe_next)) {
     675           0 :                 if (fnhe->fnhe_daddr == daddr)
     676             :                         break;
     677           0 :                 depth++;
     678             :         }
     679             : 
     680           0 :         if (fnhe) {
     681           0 :                 if (fnhe->fnhe_genid != genid)
     682           0 :                         fnhe->fnhe_genid = genid;
     683           0 :                 if (gw)
     684           0 :                         fnhe->fnhe_gw = gw;
     685           0 :                 if (pmtu) {
     686           0 :                         fnhe->fnhe_pmtu = pmtu;
     687           0 :                         fnhe->fnhe_mtu_locked = lock;
     688             :                 }
     689           0 :                 fnhe->fnhe_expires = max(1UL, expires);
     690             :                 /* Update all cached dsts too */
     691           0 :                 rt = rcu_dereference(fnhe->fnhe_rth_input);
     692           0 :                 if (rt)
     693           0 :                         fill_route_from_fnhe(rt, fnhe);
     694           0 :                 rt = rcu_dereference(fnhe->fnhe_rth_output);
     695           0 :                 if (rt)
     696           0 :                         fill_route_from_fnhe(rt, fnhe);
     697             :         } else {
     698           0 :                 if (depth > FNHE_RECLAIM_DEPTH)
     699           0 :                         fnhe = fnhe_oldest(hash);
     700             :                 else {
     701           0 :                         fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
     702           0 :                         if (!fnhe)
     703           0 :                                 goto out_unlock;
     704             : 
     705           0 :                         fnhe->fnhe_next = hash->chain;
     706           0 :                         rcu_assign_pointer(hash->chain, fnhe);
     707             :                 }
     708           0 :                 fnhe->fnhe_genid = genid;
     709           0 :                 fnhe->fnhe_daddr = daddr;
     710           0 :                 fnhe->fnhe_gw = gw;
     711           0 :                 fnhe->fnhe_pmtu = pmtu;
     712           0 :                 fnhe->fnhe_mtu_locked = lock;
     713           0 :                 fnhe->fnhe_expires = max(1UL, expires);
     714             : 
     715             :                 /* Exception created; mark the cached routes for the nexthop
     716             :                  * stale, so anyone caching it rechecks if this exception
     717             :                  * applies to them.
     718             :                  */
     719           0 :                 rt = rcu_dereference(nhc->nhc_rth_input);
     720           0 :                 if (rt)
     721           0 :                         rt->dst.obsolete = DST_OBSOLETE_KILL;
     722             : 
     723           0 :                 for_each_possible_cpu(i) {
     724           0 :                         struct rtable __rcu **prt;
     725           0 :                         prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
     726           0 :                         rt = rcu_dereference(*prt);
     727           0 :                         if (rt)
     728           0 :                                 rt->dst.obsolete = DST_OBSOLETE_KILL;
     729             :                 }
     730             :         }
     731             : 
     732           0 :         fnhe->fnhe_stamp = jiffies;
     733             : 
     734           0 : out_unlock:
     735           0 :         spin_unlock_bh(&fnhe_lock);
     736           0 : }
     737             : 
     738           0 : static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
     739             :                              bool kill_route)
     740             : {
     741           0 :         __be32 new_gw = icmp_hdr(skb)->un.gateway;
     742           0 :         __be32 old_gw = ip_hdr(skb)->saddr;
     743           0 :         struct net_device *dev = skb->dev;
     744           0 :         struct in_device *in_dev;
     745           0 :         struct fib_result res;
     746           0 :         struct neighbour *n;
     747           0 :         struct net *net;
     748             : 
     749           0 :         switch (icmp_hdr(skb)->code & 7) {
     750             :         case ICMP_REDIR_NET:
     751             :         case ICMP_REDIR_NETTOS:
     752             :         case ICMP_REDIR_HOST:
     753             :         case ICMP_REDIR_HOSTTOS:
     754           0 :                 break;
     755             : 
     756             :         default:
     757           0 :                 return;
     758             :         }
     759             : 
     760           0 :         if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw)
     761             :                 return;
     762             : 
     763           0 :         in_dev = __in_dev_get_rcu(dev);
     764           0 :         if (!in_dev)
     765             :                 return;
     766             : 
     767           0 :         net = dev_net(dev);
     768           0 :         if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
     769           0 :             ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
     770           0 :             ipv4_is_zeronet(new_gw))
     771           0 :                 goto reject_redirect;
     772             : 
     773           0 :         if (!IN_DEV_SHARED_MEDIA(in_dev)) {
     774           0 :                 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
     775           0 :                         goto reject_redirect;
     776           0 :                 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
     777           0 :                         goto reject_redirect;
     778             :         } else {
     779           0 :                 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
     780           0 :                         goto reject_redirect;
     781             :         }
     782             : 
     783           0 :         n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
     784           0 :         if (!n)
     785           0 :                 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
     786           0 :         if (!IS_ERR(n)) {
     787           0 :                 if (!(n->nud_state & NUD_VALID)) {
     788           0 :                         neigh_event_send(n, NULL);
     789             :                 } else {
     790           0 :                         if (fib_lookup(net, fl4, &res, 0) == 0) {
     791           0 :                                 struct fib_nh_common *nhc;
     792             : 
     793           0 :                                 fib_select_path(net, &res, fl4, skb);
     794           0 :                                 nhc = FIB_RES_NHC(res);
     795           0 :                                 update_or_create_fnhe(nhc, fl4->daddr, new_gw,
     796             :                                                 0, false,
     797             :                                                 jiffies + ip_rt_gc_timeout);
     798             :                         }
     799           0 :                         if (kill_route)
     800           0 :                                 rt->dst.obsolete = DST_OBSOLETE_KILL;
     801           0 :                         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
     802             :                 }
     803           0 :                 neigh_release(n);
     804             :         }
     805             :         return;
     806             : 
     807           0 : reject_redirect:
     808             : #ifdef CONFIG_IP_ROUTE_VERBOSE
     809             :         if (IN_DEV_LOG_MARTIANS(in_dev)) {
     810             :                 const struct iphdr *iph = (const struct iphdr *) skb->data;
     811             :                 __be32 daddr = iph->daddr;
     812             :                 __be32 saddr = iph->saddr;
     813             : 
     814             :                 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
     815             :                                      "  Advised path = %pI4 -> %pI4\n",
     816             :                                      &old_gw, dev->name, &new_gw,
     817             :                                      &saddr, &daddr);
     818             :         }
     819             : #endif
     820           0 :         ;
     821             : }
     822             : 
     823           0 : static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
     824             : {
     825           0 :         struct rtable *rt;
     826           0 :         struct flowi4 fl4;
     827           0 :         const struct iphdr *iph = (const struct iphdr *) skb->data;
     828           0 :         struct net *net = dev_net(skb->dev);
     829           0 :         int oif = skb->dev->ifindex;
     830           0 :         u8 tos = RT_TOS(iph->tos);
     831           0 :         u8 prot = iph->protocol;
     832           0 :         u32 mark = skb->mark;
     833             : 
     834           0 :         rt = (struct rtable *) dst;
     835             : 
     836           0 :         __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
     837           0 :         __ip_do_redirect(rt, skb, &fl4, true);
     838           0 : }
     839             : 
     840           0 : static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
     841             : {
     842           0 :         struct rtable *rt = (struct rtable *)dst;
     843           0 :         struct dst_entry *ret = dst;
     844             : 
     845           0 :         if (rt) {
     846           0 :                 if (dst->obsolete > 0) {
     847           0 :                         ip_rt_put(rt);
     848           0 :                         ret = NULL;
     849           0 :                 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
     850           0 :                            rt->dst.expires) {
     851           0 :                         ip_rt_put(rt);
     852           0 :                         ret = NULL;
     853             :                 }
     854             :         }
     855           0 :         return ret;
     856             : }
     857             : 
     858             : /*
     859             :  * Algorithm:
     860             :  *      1. The first ip_rt_redirect_number redirects are sent
     861             :  *         with exponential backoff, then we stop sending them at all,
     862             :  *         assuming that the host ignores our redirects.
     863             :  *      2. If we did not see packets requiring redirects
     864             :  *         during ip_rt_redirect_silence, we assume that the host
     865             :  *         forgot redirected route and start to send redirects again.
     866             :  *
     867             :  * This algorithm is much cheaper and more intelligent than dumb load limiting
     868             :  * in icmp.c.
     869             :  *
     870             :  * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
     871             :  * and "frag. need" (breaks PMTU discovery) in icmp.c.
     872             :  */
     873             : 
     874           0 : void ip_rt_send_redirect(struct sk_buff *skb)
     875             : {
     876           0 :         struct rtable *rt = skb_rtable(skb);
     877           0 :         struct in_device *in_dev;
     878           0 :         struct inet_peer *peer;
     879           0 :         struct net *net;
     880           0 :         int log_martians;
     881           0 :         int vif;
     882             : 
     883           0 :         rcu_read_lock();
     884           0 :         in_dev = __in_dev_get_rcu(rt->dst.dev);
     885           0 :         if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
     886           0 :                 rcu_read_unlock();
     887           0 :                 return;
     888             :         }
     889           0 :         log_martians = IN_DEV_LOG_MARTIANS(in_dev);
     890           0 :         vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
     891           0 :         rcu_read_unlock();
     892             : 
     893           0 :         net = dev_net(rt->dst.dev);
     894           0 :         peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
     895           0 :         if (!peer) {
     896           0 :                 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
     897           0 :                           rt_nexthop(rt, ip_hdr(skb)->daddr));
     898           0 :                 return;
     899             :         }
     900             : 
     901             :         /* No redirected packets during ip_rt_redirect_silence;
     902             :          * reset the algorithm.
     903             :          */
     904           0 :         if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) {
     905           0 :                 peer->rate_tokens = 0;
     906           0 :                 peer->n_redirects = 0;
     907             :         }
     908             : 
     909             :         /* Too many ignored redirects; do not send anything
     910             :          * set dst.rate_last to the last seen redirected packet.
     911             :          */
     912           0 :         if (peer->n_redirects >= ip_rt_redirect_number) {
     913           0 :                 peer->rate_last = jiffies;
     914           0 :                 goto out_put_peer;
     915             :         }
     916             : 
     917             :         /* Check for load limit; set rate_last to the latest sent
     918             :          * redirect.
     919             :          */
     920           0 :         if (peer->n_redirects == 0 ||
     921           0 :             time_after(jiffies,
     922             :                        (peer->rate_last +
     923             :                         (ip_rt_redirect_load << peer->n_redirects)))) {
     924           0 :                 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
     925             : 
     926           0 :                 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
     927           0 :                 peer->rate_last = jiffies;
     928           0 :                 ++peer->n_redirects;
     929             : #ifdef CONFIG_IP_ROUTE_VERBOSE
     930             :                 if (log_martians &&
     931             :                     peer->n_redirects == ip_rt_redirect_number)
     932             :                         net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
     933             :                                              &ip_hdr(skb)->saddr, inet_iif(skb),
     934             :                                              &ip_hdr(skb)->daddr, &gw);
     935             : #endif
     936             :         }
     937           0 : out_put_peer:
     938           0 :         inet_putpeer(peer);
     939             : }
     940             : 
     941           0 : static int ip_error(struct sk_buff *skb)
     942             : {
     943           0 :         struct rtable *rt = skb_rtable(skb);
     944           0 :         struct net_device *dev = skb->dev;
     945           0 :         struct in_device *in_dev;
     946           0 :         struct inet_peer *peer;
     947           0 :         unsigned long now;
     948           0 :         struct net *net;
     949           0 :         bool send;
     950           0 :         int code;
     951             : 
     952           0 :         if (netif_is_l3_master(skb->dev)) {
     953           0 :                 dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
     954           0 :                 if (!dev)
     955           0 :                         goto out;
     956             :         }
     957             : 
     958           0 :         in_dev = __in_dev_get_rcu(dev);
     959             : 
     960             :         /* IP on this device is disabled. */
     961           0 :         if (!in_dev)
     962           0 :                 goto out;
     963             : 
     964           0 :         net = dev_net(rt->dst.dev);
     965           0 :         if (!IN_DEV_FORWARD(in_dev)) {
     966           0 :                 switch (rt->dst.error) {
     967             :                 case EHOSTUNREACH:
     968           0 :                         __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
     969             :                         break;
     970             : 
     971             :                 case ENETUNREACH:
     972           0 :                         __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
     973             :                         break;
     974             :                 }
     975           0 :                 goto out;
     976             :         }
     977             : 
     978           0 :         switch (rt->dst.error) {
     979           0 :         case EINVAL:
     980             :         default:
     981           0 :                 goto out;
     982             :         case EHOSTUNREACH:
     983             :                 code = ICMP_HOST_UNREACH;
     984             :                 break;
     985           0 :         case ENETUNREACH:
     986           0 :                 code = ICMP_NET_UNREACH;
     987           0 :                 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
     988             :                 break;
     989           0 :         case EACCES:
     990           0 :                 code = ICMP_PKT_FILTERED;
     991           0 :                 break;
     992             :         }
     993             : 
     994           0 :         peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
     995             :                                l3mdev_master_ifindex(skb->dev), 1);
     996             : 
     997           0 :         send = true;
     998           0 :         if (peer) {
     999           0 :                 now = jiffies;
    1000           0 :                 peer->rate_tokens += now - peer->rate_last;
    1001           0 :                 if (peer->rate_tokens > ip_rt_error_burst)
    1002           0 :                         peer->rate_tokens = ip_rt_error_burst;
    1003           0 :                 peer->rate_last = now;
    1004           0 :                 if (peer->rate_tokens >= ip_rt_error_cost)
    1005           0 :                         peer->rate_tokens -= ip_rt_error_cost;
    1006             :                 else
    1007             :                         send = false;
    1008           0 :                 inet_putpeer(peer);
    1009             :         }
    1010           0 :         if (send)
    1011           0 :                 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
    1012             : 
    1013           0 : out:    kfree_skb(skb);
    1014           0 :         return 0;
    1015             : }
    1016             : 
    1017           0 : static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
    1018             : {
    1019           0 :         struct dst_entry *dst = &rt->dst;
    1020           0 :         struct net *net = dev_net(dst->dev);
    1021           0 :         struct fib_result res;
    1022           0 :         bool lock = false;
    1023           0 :         u32 old_mtu;
    1024             : 
    1025           0 :         if (ip_mtu_locked(dst))
    1026           0 :                 return;
    1027             : 
    1028           0 :         old_mtu = ipv4_mtu(dst);
    1029           0 :         if (old_mtu < mtu)
    1030             :                 return;
    1031             : 
    1032           0 :         if (mtu < ip_rt_min_pmtu) {
    1033           0 :                 lock = true;
    1034           0 :                 mtu = min(old_mtu, ip_rt_min_pmtu);
    1035             :         }
    1036             : 
    1037           0 :         if (rt->rt_pmtu == mtu && !lock &&
    1038           0 :             time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
    1039             :                 return;
    1040             : 
    1041           0 :         rcu_read_lock();
    1042           0 :         if (fib_lookup(net, fl4, &res, 0) == 0) {
    1043           0 :                 struct fib_nh_common *nhc;
    1044             : 
    1045           0 :                 fib_select_path(net, &res, fl4, NULL);
    1046           0 :                 nhc = FIB_RES_NHC(res);
    1047           0 :                 update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
    1048             :                                       jiffies + ip_rt_mtu_expires);
    1049             :         }
    1050           0 :         rcu_read_unlock();
    1051             : }
    1052             : 
    1053           0 : static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
    1054             :                               struct sk_buff *skb, u32 mtu,
    1055             :                               bool confirm_neigh)
    1056             : {
    1057           0 :         struct rtable *rt = (struct rtable *) dst;
    1058           0 :         struct flowi4 fl4;
    1059             : 
    1060           0 :         ip_rt_build_flow_key(&fl4, sk, skb);
    1061             : 
    1062             :         /* Don't make lookup fail for bridged encapsulations */
    1063           0 :         if (skb && netif_is_any_bridge_port(skb->dev))
    1064           0 :                 fl4.flowi4_oif = 0;
    1065             : 
    1066           0 :         __ip_rt_update_pmtu(rt, &fl4, mtu);
    1067           0 : }
    1068             : 
    1069           0 : void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
    1070             :                       int oif, u8 protocol)
    1071             : {
    1072           0 :         const struct iphdr *iph = (const struct iphdr *)skb->data;
    1073           0 :         struct flowi4 fl4;
    1074           0 :         struct rtable *rt;
    1075           0 :         u32 mark = IP4_REPLY_MARK(net, skb->mark);
    1076             : 
    1077           0 :         __build_flow_key(net, &fl4, NULL, iph, oif,
    1078           0 :                          RT_TOS(iph->tos), protocol, mark, 0);
    1079           0 :         rt = __ip_route_output_key(net, &fl4);
    1080           0 :         if (!IS_ERR(rt)) {
    1081           0 :                 __ip_rt_update_pmtu(rt, &fl4, mtu);
    1082           0 :                 ip_rt_put(rt);
    1083             :         }
    1084           0 : }
    1085             : EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
    1086             : 
    1087           0 : static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
    1088             : {
    1089           0 :         const struct iphdr *iph = (const struct iphdr *)skb->data;
    1090           0 :         struct flowi4 fl4;
    1091           0 :         struct rtable *rt;
    1092             : 
    1093           0 :         __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
    1094             : 
    1095           0 :         if (!fl4.flowi4_mark)
    1096           0 :                 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
    1097             : 
    1098           0 :         rt = __ip_route_output_key(sock_net(sk), &fl4);
    1099           0 :         if (!IS_ERR(rt)) {
    1100           0 :                 __ip_rt_update_pmtu(rt, &fl4, mtu);
    1101           0 :                 ip_rt_put(rt);
    1102             :         }
    1103           0 : }
    1104             : 
    1105           0 : void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
    1106             : {
    1107           0 :         const struct iphdr *iph = (const struct iphdr *)skb->data;
    1108           0 :         struct flowi4 fl4;
    1109           0 :         struct rtable *rt;
    1110           0 :         struct dst_entry *odst = NULL;
    1111           0 :         bool new = false;
    1112           0 :         struct net *net = sock_net(sk);
    1113             : 
    1114           0 :         bh_lock_sock(sk);
    1115             : 
    1116           0 :         if (!ip_sk_accept_pmtu(sk))
    1117           0 :                 goto out;
    1118             : 
    1119           0 :         odst = sk_dst_get(sk);
    1120             : 
    1121           0 :         if (sock_owned_by_user(sk) || !odst) {
    1122           0 :                 __ipv4_sk_update_pmtu(skb, sk, mtu);
    1123           0 :                 goto out;
    1124             :         }
    1125             : 
    1126           0 :         __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
    1127             : 
    1128           0 :         rt = (struct rtable *)odst;
    1129           0 :         if (odst->obsolete && !odst->ops->check(odst, 0)) {
    1130           0 :                 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
    1131           0 :                 if (IS_ERR(rt))
    1132           0 :                         goto out;
    1133             : 
    1134             :                 new = true;
    1135             :         }
    1136             : 
    1137           0 :         __ip_rt_update_pmtu((struct rtable *)xfrm_dst_path(&rt->dst), &fl4, mtu);
    1138             : 
    1139           0 :         if (!dst_check(&rt->dst, 0)) {
    1140           0 :                 if (new)
    1141           0 :                         dst_release(&rt->dst);
    1142             : 
    1143           0 :                 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
    1144           0 :                 if (IS_ERR(rt))
    1145           0 :                         goto out;
    1146             : 
    1147             :                 new = true;
    1148             :         }
    1149             : 
    1150           0 :         if (new)
    1151           0 :                 sk_dst_set(sk, &rt->dst);
    1152             : 
    1153           0 : out:
    1154           0 :         bh_unlock_sock(sk);
    1155           0 :         dst_release(odst);
    1156           0 : }
    1157             : EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
    1158             : 
    1159           0 : void ipv4_redirect(struct sk_buff *skb, struct net *net,
    1160             :                    int oif, u8 protocol)
    1161             : {
    1162           0 :         const struct iphdr *iph = (const struct iphdr *)skb->data;
    1163           0 :         struct flowi4 fl4;
    1164           0 :         struct rtable *rt;
    1165             : 
    1166           0 :         __build_flow_key(net, &fl4, NULL, iph, oif,
    1167           0 :                          RT_TOS(iph->tos), protocol, 0, 0);
    1168           0 :         rt = __ip_route_output_key(net, &fl4);
    1169           0 :         if (!IS_ERR(rt)) {
    1170           0 :                 __ip_do_redirect(rt, skb, &fl4, false);
    1171           0 :                 ip_rt_put(rt);
    1172             :         }
    1173           0 : }
    1174             : EXPORT_SYMBOL_GPL(ipv4_redirect);
    1175             : 
    1176           0 : void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
    1177             : {
    1178           0 :         const struct iphdr *iph = (const struct iphdr *)skb->data;
    1179           0 :         struct flowi4 fl4;
    1180           0 :         struct rtable *rt;
    1181           0 :         struct net *net = sock_net(sk);
    1182             : 
    1183           0 :         __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
    1184           0 :         rt = __ip_route_output_key(net, &fl4);
    1185           0 :         if (!IS_ERR(rt)) {
    1186           0 :                 __ip_do_redirect(rt, skb, &fl4, false);
    1187           0 :                 ip_rt_put(rt);
    1188             :         }
    1189           0 : }
    1190             : EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
    1191             : 
    1192        1246 : INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst,
    1193             :                                                          u32 cookie)
    1194             : {
    1195        1246 :         struct rtable *rt = (struct rtable *) dst;
    1196             : 
    1197             :         /* All IPV4 dsts are created with ->obsolete set to the value
    1198             :          * DST_OBSOLETE_FORCE_CHK which forces validation calls down
    1199             :          * into this function always.
    1200             :          *
    1201             :          * When a PMTU/redirect information update invalidates a route,
    1202             :          * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
    1203             :          * DST_OBSOLETE_DEAD.
    1204             :          */
    1205        2492 :         if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
    1206           0 :                 return NULL;
    1207             :         return dst;
    1208             : }
    1209             : EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);
    1210             : 
    1211           0 : static void ipv4_send_dest_unreach(struct sk_buff *skb)
    1212             : {
    1213           0 :         struct ip_options opt;
    1214           0 :         int res;
    1215             : 
    1216             :         /* Recompile ip options since IPCB may not be valid anymore.
    1217             :          * Also check we have a reasonable ipv4 header.
    1218             :          */
    1219           0 :         if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
    1220           0 :             ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
    1221           0 :                 return;
    1222             : 
    1223           0 :         memset(&opt, 0, sizeof(opt));
    1224           0 :         if (ip_hdr(skb)->ihl > 5) {
    1225           0 :                 if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
    1226             :                         return;
    1227           0 :                 opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
    1228             : 
    1229           0 :                 rcu_read_lock();
    1230           0 :                 res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
    1231           0 :                 rcu_read_unlock();
    1232             : 
    1233           0 :                 if (res)
    1234             :                         return;
    1235             :         }
    1236           0 :         __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
    1237             : }
    1238             : 
    1239           0 : static void ipv4_link_failure(struct sk_buff *skb)
    1240             : {
    1241           0 :         struct rtable *rt;
    1242             : 
    1243           0 :         ipv4_send_dest_unreach(skb);
    1244             : 
    1245           0 :         rt = skb_rtable(skb);
    1246           0 :         if (rt)
    1247           0 :                 dst_set_expires(&rt->dst, 0);
    1248           0 : }
    1249             : 
    1250           0 : static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
    1251             : {
    1252           0 :         pr_debug("%s: %pI4 -> %pI4, %s\n",
    1253             :                  __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
    1254             :                  skb->dev ? skb->dev->name : "?");
    1255           0 :         kfree_skb(skb);
    1256           0 :         WARN_ON(1);
    1257           0 :         return 0;
    1258             : }
    1259             : 
    1260             : /*
    1261             :    We do not cache source address of outgoing interface,
    1262             :    because it is used only by IP RR, TS and SRR options,
    1263             :    so that it out of fast path.
    1264             : 
    1265             :    BTW remember: "addr" is allowed to be not aligned
    1266             :    in IP options!
    1267             :  */
    1268             : 
    1269           0 : void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
    1270             : {
    1271           0 :         __be32 src;
    1272             : 
    1273           0 :         if (rt_is_output_route(rt))
    1274           0 :                 src = ip_hdr(skb)->saddr;
    1275             :         else {
    1276           0 :                 struct fib_result res;
    1277           0 :                 struct iphdr *iph = ip_hdr(skb);
    1278           0 :                 struct flowi4 fl4 = {
    1279           0 :                         .daddr = iph->daddr,
    1280           0 :                         .saddr = iph->saddr,
    1281           0 :                         .flowi4_tos = RT_TOS(iph->tos),
    1282           0 :                         .flowi4_oif = rt->dst.dev->ifindex,
    1283           0 :                         .flowi4_iif = skb->dev->ifindex,
    1284           0 :                         .flowi4_mark = skb->mark,
    1285             :                 };
    1286             : 
    1287           0 :                 rcu_read_lock();
    1288           0 :                 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
    1289           0 :                         src = fib_result_prefsrc(dev_net(rt->dst.dev), &res);
    1290             :                 else
    1291           0 :                         src = inet_select_addr(rt->dst.dev,
    1292             :                                                rt_nexthop(rt, iph->daddr),
    1293             :                                                RT_SCOPE_UNIVERSE);
    1294           0 :                 rcu_read_unlock();
    1295             :         }
    1296           0 :         memcpy(addr, &src, 4);
    1297           0 : }
    1298             : 
    1299             : #ifdef CONFIG_IP_ROUTE_CLASSID
    1300             : static void set_class_tag(struct rtable *rt, u32 tag)
    1301             : {
    1302             :         if (!(rt->dst.tclassid & 0xFFFF))
    1303             :                 rt->dst.tclassid |= tag & 0xFFFF;
    1304             :         if (!(rt->dst.tclassid & 0xFFFF0000))
    1305             :                 rt->dst.tclassid |= tag & 0xFFFF0000;
    1306             : }
    1307             : #endif
    1308             : 
    1309          12 : static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
    1310             : {
    1311          12 :         unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
    1312          12 :         unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
    1313             :                                     ip_rt_min_advmss);
    1314             : 
    1315          12 :         return min(advmss, IPV4_MAX_PMTU - header_size);
    1316             : }
    1317             : 
    1318         980 : INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
    1319             : {
    1320         980 :         const struct rtable *rt = (const struct rtable *)dst;
    1321         980 :         unsigned int mtu = rt->rt_pmtu;
    1322             : 
    1323         980 :         if (!mtu || time_after_eq(jiffies, rt->dst.expires))
    1324         980 :                 mtu = dst_metric_raw(dst, RTAX_MTU);
    1325             : 
    1326         980 :         if (mtu)
    1327             :                 return mtu;
    1328             : 
    1329         980 :         mtu = READ_ONCE(dst->dev->mtu);
    1330             : 
    1331         980 :         if (unlikely(ip_mtu_locked(dst))) {
    1332           0 :                 if (rt->rt_uses_gateway && mtu > 576)
    1333             :                         mtu = 576;
    1334             :         }
    1335             : 
    1336         980 :         mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
    1337             : 
    1338         980 :         return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
    1339             : }
    1340             : EXPORT_INDIRECT_CALLABLE(ipv4_mtu);
    1341             : 
    1342           0 : static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
    1343             : {
    1344           0 :         struct fnhe_hash_bucket *hash;
    1345           0 :         struct fib_nh_exception *fnhe, __rcu **fnhe_p;
    1346           0 :         u32 hval = fnhe_hashfun(daddr);
    1347             : 
    1348           0 :         spin_lock_bh(&fnhe_lock);
    1349             : 
    1350           0 :         hash = rcu_dereference_protected(nhc->nhc_exceptions,
    1351             :                                          lockdep_is_held(&fnhe_lock));
    1352           0 :         hash += hval;
    1353             : 
    1354           0 :         fnhe_p = &hash->chain;
    1355           0 :         fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
    1356           0 :         while (fnhe) {
    1357           0 :                 if (fnhe->fnhe_daddr == daddr) {
    1358           0 :                         rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
    1359             :                                 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
    1360             :                         /* set fnhe_daddr to 0 to ensure it won't bind with
    1361             :                          * new dsts in rt_bind_exception().
    1362             :                          */
    1363           0 :                         fnhe->fnhe_daddr = 0;
    1364           0 :                         fnhe_flush_routes(fnhe);
    1365           0 :                         kfree_rcu(fnhe, rcu);
    1366             :                         break;
    1367             :                 }
    1368           0 :                 fnhe_p = &fnhe->fnhe_next;
    1369           0 :                 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
    1370             :                                                  lockdep_is_held(&fnhe_lock));
    1371             :         }
    1372             : 
    1373           0 :         spin_unlock_bh(&fnhe_lock);
    1374           0 : }
    1375             : 
    1376          32 : static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
    1377             :                                                __be32 daddr)
    1378             : {
    1379          32 :         struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
    1380          32 :         struct fib_nh_exception *fnhe;
    1381          32 :         u32 hval;
    1382             : 
    1383          32 :         if (!hash)
    1384             :                 return NULL;
    1385             : 
    1386           0 :         hval = fnhe_hashfun(daddr);
    1387             : 
    1388           0 :         for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
    1389           0 :              fnhe = rcu_dereference(fnhe->fnhe_next)) {
    1390           0 :                 if (fnhe->fnhe_daddr == daddr) {
    1391           0 :                         if (fnhe->fnhe_expires &&
    1392           0 :                             time_after(jiffies, fnhe->fnhe_expires)) {
    1393           0 :                                 ip_del_fnhe(nhc, daddr);
    1394           0 :                                 break;
    1395             :                         }
    1396             :                         return fnhe;
    1397             :                 }
    1398             :         }
    1399             :         return NULL;
    1400             : }
    1401             : 
    1402             : /* MTU selection:
    1403             :  * 1. mtu on route is locked - use it
    1404             :  * 2. mtu from nexthop exception
    1405             :  * 3. mtu from egress device
    1406             :  */
    1407             : 
    1408           0 : u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
    1409             : {
    1410           0 :         struct fib_nh_common *nhc = res->nhc;
    1411           0 :         struct net_device *dev = nhc->nhc_dev;
    1412           0 :         struct fib_info *fi = res->fi;
    1413           0 :         u32 mtu = 0;
    1414             : 
    1415           0 :         if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
    1416           0 :             fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
    1417           0 :                 mtu = fi->fib_mtu;
    1418             : 
    1419           0 :         if (likely(!mtu)) {
    1420           0 :                 struct fib_nh_exception *fnhe;
    1421             : 
    1422           0 :                 fnhe = find_exception(nhc, daddr);
    1423           0 :                 if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
    1424           0 :                         mtu = fnhe->fnhe_pmtu;
    1425             :         }
    1426             : 
    1427           0 :         if (likely(!mtu))
    1428           0 :                 mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
    1429             : 
    1430           0 :         return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu);
    1431             : }
    1432             : 
    1433           0 : static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
    1434             :                               __be32 daddr, const bool do_cache)
    1435             : {
    1436           0 :         bool ret = false;
    1437             : 
    1438           0 :         spin_lock_bh(&fnhe_lock);
    1439             : 
    1440           0 :         if (daddr == fnhe->fnhe_daddr) {
    1441           0 :                 struct rtable __rcu **porig;
    1442           0 :                 struct rtable *orig;
    1443           0 :                 int genid = fnhe_genid(dev_net(rt->dst.dev));
    1444             : 
    1445           0 :                 if (rt_is_input_route(rt))
    1446           0 :                         porig = &fnhe->fnhe_rth_input;
    1447             :                 else
    1448           0 :                         porig = &fnhe->fnhe_rth_output;
    1449           0 :                 orig = rcu_dereference(*porig);
    1450             : 
    1451           0 :                 if (fnhe->fnhe_genid != genid) {
    1452           0 :                         fnhe->fnhe_genid = genid;
    1453           0 :                         fnhe->fnhe_gw = 0;
    1454           0 :                         fnhe->fnhe_pmtu = 0;
    1455           0 :                         fnhe->fnhe_expires = 0;
    1456           0 :                         fnhe->fnhe_mtu_locked = false;
    1457           0 :                         fnhe_flush_routes(fnhe);
    1458           0 :                         orig = NULL;
    1459             :                 }
    1460           0 :                 fill_route_from_fnhe(rt, fnhe);
    1461           0 :                 if (!rt->rt_gw4) {
    1462           0 :                         rt->rt_gw4 = daddr;
    1463           0 :                         rt->rt_gw_family = AF_INET;
    1464             :                 }
    1465             : 
    1466           0 :                 if (do_cache) {
    1467           0 :                         dst_hold(&rt->dst);
    1468           0 :                         rcu_assign_pointer(*porig, rt);
    1469           0 :                         if (orig) {
    1470           0 :                                 dst_dev_put(&orig->dst);
    1471           0 :                                 dst_release(&orig->dst);
    1472             :                         }
    1473             :                         ret = true;
    1474             :                 }
    1475             : 
    1476           0 :                 fnhe->fnhe_stamp = jiffies;
    1477             :         }
    1478           0 :         spin_unlock_bh(&fnhe_lock);
    1479             : 
    1480           0 :         return ret;
    1481             : }
    1482             : 
    1483           8 : static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
    1484             : {
    1485           8 :         struct rtable *orig, *prev, **p;
    1486           8 :         bool ret = true;
    1487             : 
    1488           8 :         if (rt_is_input_route(rt)) {
    1489           1 :                 p = (struct rtable **)&nhc->nhc_rth_input;
    1490             :         } else {
    1491           7 :                 p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
    1492             :         }
    1493           8 :         orig = *p;
    1494             : 
    1495             :         /* hold dst before doing cmpxchg() to avoid race condition
    1496             :          * on this dst
    1497             :          */
    1498           8 :         dst_hold(&rt->dst);
    1499           8 :         prev = cmpxchg(p, orig, rt);
    1500           8 :         if (prev == orig) {
    1501           8 :                 if (orig) {
    1502           0 :                         rt_add_uncached_list(orig);
    1503           0 :                         dst_release(&orig->dst);
    1504             :                 }
    1505             :         } else {
    1506           0 :                 dst_release(&rt->dst);
    1507           0 :                 ret = false;
    1508             :         }
    1509             : 
    1510           8 :         return ret;
    1511             : }
    1512             : 
    1513             : struct uncached_list {
    1514             :         spinlock_t              lock;
    1515             :         struct list_head        head;
    1516             : };
    1517             : 
    1518             : static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
    1519             : 
    1520           6 : void rt_add_uncached_list(struct rtable *rt)
    1521             : {
    1522           6 :         struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
    1523             : 
    1524           6 :         rt->rt_uncached_list = ul;
    1525             : 
    1526           6 :         spin_lock_bh(&ul->lock);
    1527           6 :         list_add_tail(&rt->rt_uncached, &ul->head);
    1528           6 :         spin_unlock_bh(&ul->lock);
    1529           6 : }
    1530             : 
    1531           8 : void rt_del_uncached_list(struct rtable *rt)
    1532             : {
    1533           8 :         if (!list_empty(&rt->rt_uncached)) {
    1534           6 :                 struct uncached_list *ul = rt->rt_uncached_list;
    1535             : 
    1536           6 :                 spin_lock_bh(&ul->lock);
    1537           6 :                 list_del(&rt->rt_uncached);
    1538           6 :                 spin_unlock_bh(&ul->lock);
    1539             :         }
    1540           8 : }
    1541             : 
    1542           8 : static void ipv4_dst_destroy(struct dst_entry *dst)
    1543             : {
    1544           8 :         struct rtable *rt = (struct rtable *)dst;
    1545             : 
    1546           8 :         ip_dst_metrics_put(dst);
    1547           8 :         rt_del_uncached_list(rt);
    1548           8 : }
    1549             : 
    1550           0 : void rt_flush_dev(struct net_device *dev)
    1551             : {
    1552           0 :         struct rtable *rt;
    1553           0 :         int cpu;
    1554             : 
    1555           0 :         for_each_possible_cpu(cpu) {
    1556           0 :                 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
    1557             : 
    1558           0 :                 spin_lock_bh(&ul->lock);
    1559           0 :                 list_for_each_entry(rt, &ul->head, rt_uncached) {
    1560           0 :                         if (rt->dst.dev != dev)
    1561           0 :                                 continue;
    1562           0 :                         rt->dst.dev = blackhole_netdev;
    1563           0 :                         dev_hold(rt->dst.dev);
    1564           0 :                         dev_put(dev);
    1565             :                 }
    1566           0 :                 spin_unlock_bh(&ul->lock);
    1567             :         }
    1568           0 : }
    1569             : 
    1570          54 : static bool rt_cache_valid(const struct rtable *rt)
    1571             : {
    1572         100 :         return  rt &&
    1573          54 :                 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
    1574          46 :                 !rt_is_expired(rt);
    1575             : }
    1576             : 
    1577          13 : static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
    1578             :                            const struct fib_result *res,
    1579             :                            struct fib_nh_exception *fnhe,
    1580             :                            struct fib_info *fi, u16 type, u32 itag,
    1581             :                            const bool do_cache)
    1582             : {
    1583          13 :         bool cached = false;
    1584             : 
    1585          13 :         if (fi) {
    1586           7 :                 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
    1587             : 
    1588           7 :                 if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
    1589           0 :                         rt->rt_uses_gateway = 1;
    1590           0 :                         rt->rt_gw_family = nhc->nhc_gw_family;
    1591             :                         /* only INET and INET6 are supported */
    1592           0 :                         if (likely(nhc->nhc_gw_family == AF_INET))
    1593           0 :                                 rt->rt_gw4 = nhc->nhc_gw.ipv4;
    1594             :                         else
    1595           0 :                                 rt->rt_gw6 = nhc->nhc_gw.ipv6;
    1596             :                 }
    1597             : 
    1598           7 :                 ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
    1599             : 
    1600             : #ifdef CONFIG_IP_ROUTE_CLASSID
    1601             :                 if (nhc->nhc_family == AF_INET) {
    1602             :                         struct fib_nh *nh;
    1603             : 
    1604             :                         nh = container_of(nhc, struct fib_nh, nh_common);
    1605             :                         rt->dst.tclassid = nh->nh_tclassid;
    1606             :                 }
    1607             : #endif
    1608           7 :                 rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
    1609           7 :                 if (unlikely(fnhe))
    1610           0 :                         cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
    1611           7 :                 else if (do_cache)
    1612           7 :                         cached = rt_cache_route(nhc, rt);
    1613           7 :                 if (unlikely(!cached)) {
    1614             :                         /* Routes we intend to cache in nexthop exception or
    1615             :                          * FIB nexthop have the DST_NOCACHE bit clear.
    1616             :                          * However, if we are unsuccessful at storing this
    1617             :                          * route into the cache we really need to set it.
    1618             :                          */
    1619           0 :                         if (!rt->rt_gw4) {
    1620           0 :                                 rt->rt_gw_family = AF_INET;
    1621           0 :                                 rt->rt_gw4 = daddr;
    1622             :                         }
    1623           0 :                         rt_add_uncached_list(rt);
    1624             :                 }
    1625             :         } else
    1626           6 :                 rt_add_uncached_list(rt);
    1627             : 
    1628             : #ifdef CONFIG_IP_ROUTE_CLASSID
    1629             : #ifdef CONFIG_IP_MULTIPLE_TABLES
    1630             :         set_class_tag(rt, res->tclassid);
    1631             : #endif
    1632             :         set_class_tag(rt, itag);
    1633             : #endif
    1634          13 : }
    1635             : 
    1636          16 : struct rtable *rt_dst_alloc(struct net_device *dev,
    1637             :                             unsigned int flags, u16 type,
    1638             :                             bool nopolicy, bool noxfrm)
    1639             : {
    1640          16 :         struct rtable *rt;
    1641             : 
    1642          48 :         rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
    1643          30 :                        (nopolicy ? DST_NOPOLICY : 0) |
    1644             :                        (noxfrm ? DST_NOXFRM : 0));
    1645             : 
    1646          16 :         if (rt) {
    1647          16 :                 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
    1648          16 :                 rt->rt_flags = flags;
    1649          16 :                 rt->rt_type = type;
    1650          16 :                 rt->rt_is_input = 0;
    1651          16 :                 rt->rt_iif = 0;
    1652          16 :                 rt->rt_pmtu = 0;
    1653          16 :                 rt->rt_mtu_locked = 0;
    1654          16 :                 rt->rt_uses_gateway = 0;
    1655          16 :                 rt->rt_gw_family = 0;
    1656          16 :                 rt->rt_gw4 = 0;
    1657          16 :                 INIT_LIST_HEAD(&rt->rt_uncached);
    1658             : 
    1659          16 :                 rt->dst.output = ip_output;
    1660          16 :                 if (flags & RTCF_LOCAL)
    1661          12 :                         rt->dst.input = ip_local_deliver;
    1662             :         }
    1663             : 
    1664          16 :         return rt;
    1665             : }
    1666             : EXPORT_SYMBOL(rt_dst_alloc);
    1667             : 
    1668           0 : struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
    1669             : {
    1670           0 :         struct rtable *new_rt;
    1671             : 
    1672           0 :         new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
    1673           0 :                            rt->dst.flags);
    1674             : 
    1675           0 :         if (new_rt) {
    1676           0 :                 new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
    1677           0 :                 new_rt->rt_flags = rt->rt_flags;
    1678           0 :                 new_rt->rt_type = rt->rt_type;
    1679           0 :                 new_rt->rt_is_input = rt->rt_is_input;
    1680           0 :                 new_rt->rt_iif = rt->rt_iif;
    1681           0 :                 new_rt->rt_pmtu = rt->rt_pmtu;
    1682           0 :                 new_rt->rt_mtu_locked = rt->rt_mtu_locked;
    1683           0 :                 new_rt->rt_gw_family = rt->rt_gw_family;
    1684           0 :                 if (rt->rt_gw_family == AF_INET)
    1685           0 :                         new_rt->rt_gw4 = rt->rt_gw4;
    1686           0 :                 else if (rt->rt_gw_family == AF_INET6)
    1687           0 :                         new_rt->rt_gw6 = rt->rt_gw6;
    1688           0 :                 INIT_LIST_HEAD(&new_rt->rt_uncached);
    1689             : 
    1690           0 :                 new_rt->dst.input = rt->dst.input;
    1691           0 :                 new_rt->dst.output = rt->dst.output;
    1692           0 :                 new_rt->dst.error = rt->dst.error;
    1693           0 :                 new_rt->dst.lastuse = jiffies;
    1694           0 :                 new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
    1695             :         }
    1696           0 :         return new_rt;
    1697             : }
    1698             : EXPORT_SYMBOL(rt_dst_clone);
    1699             : 
    1700             : /* called in rcu_read_lock() section */
    1701           0 : int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
    1702             :                           u8 tos, struct net_device *dev,
    1703             :                           struct in_device *in_dev, u32 *itag)
    1704             : {
    1705           0 :         int err;
    1706             : 
    1707             :         /* Primary sanity checks. */
    1708           0 :         if (!in_dev)
    1709             :                 return -EINVAL;
    1710             : 
    1711           0 :         if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
    1712           0 :             skb->protocol != htons(ETH_P_IP))
    1713             :                 return -EINVAL;
    1714             : 
    1715           0 :         if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
    1716             :                 return -EINVAL;
    1717             : 
    1718           0 :         if (ipv4_is_zeronet(saddr)) {
    1719           0 :                 if (!ipv4_is_local_multicast(daddr) &&
    1720           0 :                     ip_hdr(skb)->protocol != IPPROTO_IGMP)
    1721           0 :                         return -EINVAL;
    1722             :         } else {
    1723           0 :                 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
    1724             :                                           in_dev, itag);
    1725           0 :                 if (err < 0)
    1726             :                         return err;
    1727             :         }
    1728             :         return 0;
    1729             : }
    1730             : 
    1731             : /* called in rcu_read_lock() section */
    1732           0 : static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
    1733             :                              u8 tos, struct net_device *dev, int our)
    1734             : {
    1735           0 :         struct in_device *in_dev = __in_dev_get_rcu(dev);
    1736           0 :         unsigned int flags = RTCF_MULTICAST;
    1737           0 :         struct rtable *rth;
    1738           0 :         u32 itag = 0;
    1739           0 :         int err;
    1740             : 
    1741           0 :         err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
    1742           0 :         if (err)
    1743             :                 return err;
    1744             : 
    1745           0 :         if (our)
    1746           0 :                 flags |= RTCF_LOCAL;
    1747             : 
    1748           0 :         rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
    1749           0 :                            IN_DEV_ORCONF(in_dev, NOPOLICY), false);
    1750           0 :         if (!rth)
    1751             :                 return -ENOBUFS;
    1752             : 
    1753             : #ifdef CONFIG_IP_ROUTE_CLASSID
    1754             :         rth->dst.tclassid = itag;
    1755             : #endif
    1756           0 :         rth->dst.output = ip_rt_bug;
    1757           0 :         rth->rt_is_input= 1;
    1758             : 
    1759             : #ifdef CONFIG_IP_MROUTE
    1760             :         if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
    1761             :                 rth->dst.input = ip_mr_input;
    1762             : #endif
    1763           0 :         RT_CACHE_STAT_INC(in_slow_mc);
    1764             : 
    1765           0 :         skb_dst_set(skb, &rth->dst);
    1766           0 :         return 0;
    1767             : }
    1768             : 
    1769             : 
    1770           0 : static void ip_handle_martian_source(struct net_device *dev,
    1771             :                                      struct in_device *in_dev,
    1772             :                                      struct sk_buff *skb,
    1773             :                                      __be32 daddr,
    1774             :                                      __be32 saddr)
    1775             : {
    1776           0 :         RT_CACHE_STAT_INC(in_martian_src);
    1777             : #ifdef CONFIG_IP_ROUTE_VERBOSE
    1778             :         if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
    1779             :                 /*
    1780             :                  *      RFC1812 recommendation, if source is martian,
    1781             :                  *      the only hint is MAC header.
    1782             :                  */
    1783             :                 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
    1784             :                         &daddr, &saddr, dev->name);
    1785             :                 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
    1786             :                         print_hex_dump(KERN_WARNING, "ll header: ",
    1787             :                                        DUMP_PREFIX_OFFSET, 16, 1,
    1788             :                                        skb_mac_header(skb),
    1789             :                                        dev->hard_header_len, false);
    1790             :                 }
    1791             :         }
    1792             : #endif
    1793             : }
    1794             : 
    1795             : /* called in rcu_read_lock() section */
    1796           0 : static int __mkroute_input(struct sk_buff *skb,
    1797             :                            const struct fib_result *res,
    1798             :                            struct in_device *in_dev,
    1799             :                            __be32 daddr, __be32 saddr, u32 tos)
    1800             : {
    1801           0 :         struct fib_nh_common *nhc = FIB_RES_NHC(*res);
    1802           0 :         struct net_device *dev = nhc->nhc_dev;
    1803           0 :         struct fib_nh_exception *fnhe;
    1804           0 :         struct rtable *rth;
    1805           0 :         int err;
    1806           0 :         struct in_device *out_dev;
    1807           0 :         bool do_cache;
    1808           0 :         u32 itag = 0;
    1809             : 
    1810             :         /* get a working reference to the output device */
    1811           0 :         out_dev = __in_dev_get_rcu(dev);
    1812           0 :         if (!out_dev) {
    1813           0 :                 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
    1814           0 :                 return -EINVAL;
    1815             :         }
    1816             : 
    1817           0 :         err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
    1818             :                                   in_dev->dev, in_dev, &itag);
    1819           0 :         if (err < 0) {
    1820           0 :                 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
    1821             :                                          saddr);
    1822             : 
    1823           0 :                 goto cleanup;
    1824             :         }
    1825             : 
    1826           0 :         do_cache = res->fi && !itag;
    1827           0 :         if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
    1828           0 :             skb->protocol == htons(ETH_P_IP)) {
    1829           0 :                 __be32 gw;
    1830             : 
    1831           0 :                 gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0;
    1832           0 :                 if (IN_DEV_SHARED_MEDIA(out_dev) ||
    1833           0 :                     inet_addr_onlink(out_dev, saddr, gw))
    1834           0 :                         IPCB(skb)->flags |= IPSKB_DOREDIRECT;
    1835             :         }
    1836             : 
    1837           0 :         if (skb->protocol != htons(ETH_P_IP)) {
    1838             :                 /* Not IP (i.e. ARP). Do not create route, if it is
    1839             :                  * invalid for proxy arp. DNAT routes are always valid.
    1840             :                  *
    1841             :                  * Proxy arp feature have been extended to allow, ARP
    1842             :                  * replies back to the same interface, to support
    1843             :                  * Private VLAN switch technologies. See arp.c.
    1844             :                  */
    1845           0 :                 if (out_dev == in_dev &&
    1846           0 :                     IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
    1847           0 :                         err = -EINVAL;
    1848           0 :                         goto cleanup;
    1849             :                 }
    1850             :         }
    1851             : 
    1852           0 :         fnhe = find_exception(nhc, daddr);
    1853           0 :         if (do_cache) {
    1854           0 :                 if (fnhe)
    1855           0 :                         rth = rcu_dereference(fnhe->fnhe_rth_input);
    1856             :                 else
    1857           0 :                         rth = rcu_dereference(nhc->nhc_rth_input);
    1858           0 :                 if (rt_cache_valid(rth)) {
    1859           0 :                         skb_dst_set_noref(skb, &rth->dst);
    1860           0 :                         goto out;
    1861             :                 }
    1862             :         }
    1863             : 
    1864           0 :         rth = rt_dst_alloc(out_dev->dev, 0, res->type,
    1865           0 :                            IN_DEV_ORCONF(in_dev, NOPOLICY),
    1866           0 :                            IN_DEV_ORCONF(out_dev, NOXFRM));
    1867           0 :         if (!rth) {
    1868           0 :                 err = -ENOBUFS;
    1869           0 :                 goto cleanup;
    1870             :         }
    1871             : 
    1872           0 :         rth->rt_is_input = 1;
    1873           0 :         RT_CACHE_STAT_INC(in_slow_tot);
    1874             : 
    1875           0 :         rth->dst.input = ip_forward;
    1876             : 
    1877           0 :         rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag,
    1878             :                        do_cache);
    1879           0 :         lwtunnel_set_redirect(&rth->dst);
    1880           0 :         skb_dst_set(skb, &rth->dst);
    1881             : out:
    1882             :         err = 0;
    1883             :  cleanup:
    1884             :         return err;
    1885             : }
    1886             : 
    1887             : #ifdef CONFIG_IP_ROUTE_MULTIPATH
    1888             : /* To make ICMP packets follow the right flow, the multipath hash is
    1889             :  * calculated from the inner IP addresses.
    1890             :  */
    1891             : static void ip_multipath_l3_keys(const struct sk_buff *skb,
    1892             :                                  struct flow_keys *hash_keys)
    1893             : {
    1894             :         const struct iphdr *outer_iph = ip_hdr(skb);
    1895             :         const struct iphdr *key_iph = outer_iph;
    1896             :         const struct iphdr *inner_iph;
    1897             :         const struct icmphdr *icmph;
    1898             :         struct iphdr _inner_iph;
    1899             :         struct icmphdr _icmph;
    1900             : 
    1901             :         if (likely(outer_iph->protocol != IPPROTO_ICMP))
    1902             :                 goto out;
    1903             : 
    1904             :         if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
    1905             :                 goto out;
    1906             : 
    1907             :         icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
    1908             :                                    &_icmph);
    1909             :         if (!icmph)
    1910             :                 goto out;
    1911             : 
    1912             :         if (!icmp_is_err(icmph->type))
    1913             :                 goto out;
    1914             : 
    1915             :         inner_iph = skb_header_pointer(skb,
    1916             :                                        outer_iph->ihl * 4 + sizeof(_icmph),
    1917             :                                        sizeof(_inner_iph), &_inner_iph);
    1918             :         if (!inner_iph)
    1919             :                 goto out;
    1920             : 
    1921             :         key_iph = inner_iph;
    1922             : out:
    1923             :         hash_keys->addrs.v4addrs.src = key_iph->saddr;
    1924             :         hash_keys->addrs.v4addrs.dst = key_iph->daddr;
    1925             : }
    1926             : 
    1927             : /* if skb is set it will be used and fl4 can be NULL */
    1928             : int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
    1929             :                        const struct sk_buff *skb, struct flow_keys *flkeys)
    1930             : {
    1931             :         u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
    1932             :         struct flow_keys hash_keys;
    1933             :         u32 mhash;
    1934             : 
    1935             :         switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
    1936             :         case 0:
    1937             :                 memset(&hash_keys, 0, sizeof(hash_keys));
    1938             :                 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
    1939             :                 if (skb) {
    1940             :                         ip_multipath_l3_keys(skb, &hash_keys);
    1941             :                 } else {
    1942             :                         hash_keys.addrs.v4addrs.src = fl4->saddr;
    1943             :                         hash_keys.addrs.v4addrs.dst = fl4->daddr;
    1944             :                 }
    1945             :                 break;
    1946             :         case 1:
    1947             :                 /* skb is currently provided only when forwarding */
    1948             :                 if (skb) {
    1949             :                         unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
    1950             :                         struct flow_keys keys;
    1951             : 
    1952             :                         /* short-circuit if we already have L4 hash present */
    1953             :                         if (skb->l4_hash)
    1954             :                                 return skb_get_hash_raw(skb) >> 1;
    1955             : 
    1956             :                         memset(&hash_keys, 0, sizeof(hash_keys));
    1957             : 
    1958             :                         if (!flkeys) {
    1959             :                                 skb_flow_dissect_flow_keys(skb, &keys, flag);
    1960             :                                 flkeys = &keys;
    1961             :                         }
    1962             : 
    1963             :                         hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
    1964             :                         hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src;
    1965             :                         hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst;
    1966             :                         hash_keys.ports.src = flkeys->ports.src;
    1967             :                         hash_keys.ports.dst = flkeys->ports.dst;
    1968             :                         hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
    1969             :                 } else {
    1970             :                         memset(&hash_keys, 0, sizeof(hash_keys));
    1971             :                         hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
    1972             :                         hash_keys.addrs.v4addrs.src = fl4->saddr;
    1973             :                         hash_keys.addrs.v4addrs.dst = fl4->daddr;
    1974             :                         hash_keys.ports.src = fl4->fl4_sport;
    1975             :                         hash_keys.ports.dst = fl4->fl4_dport;
    1976             :                         hash_keys.basic.ip_proto = fl4->flowi4_proto;
    1977             :                 }
    1978             :                 break;
    1979             :         case 2:
    1980             :                 memset(&hash_keys, 0, sizeof(hash_keys));
    1981             :                 /* skb is currently provided only when forwarding */
    1982             :                 if (skb) {
    1983             :                         struct flow_keys keys;
    1984             : 
    1985             :                         skb_flow_dissect_flow_keys(skb, &keys, 0);
    1986             :                         /* Inner can be v4 or v6 */
    1987             :                         if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
    1988             :                                 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
    1989             :                                 hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
    1990             :                                 hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
    1991             :                         } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
    1992             :                                 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
    1993             :                                 hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
    1994             :                                 hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
    1995             :                                 hash_keys.tags.flow_label = keys.tags.flow_label;
    1996             :                                 hash_keys.basic.ip_proto = keys.basic.ip_proto;
    1997             :                         } else {
    1998             :                                 /* Same as case 0 */
    1999             :                                 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
    2000             :                                 ip_multipath_l3_keys(skb, &hash_keys);
    2001             :                         }
    2002             :                 } else {
    2003             :                         /* Same as case 0 */
    2004             :                         hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
    2005             :                         hash_keys.addrs.v4addrs.src = fl4->saddr;
    2006             :                         hash_keys.addrs.v4addrs.dst = fl4->daddr;
    2007             :                 }
    2008             :                 break;
    2009             :         }
    2010             :         mhash = flow_hash_from_keys(&hash_keys);
    2011             : 
    2012             :         if (multipath_hash)
    2013             :                 mhash = jhash_2words(mhash, multipath_hash, 0);
    2014             : 
    2015             :         return mhash >> 1;
    2016             : }
    2017             : #endif /* CONFIG_IP_ROUTE_MULTIPATH */
    2018             : 
    2019           0 : static int ip_mkroute_input(struct sk_buff *skb,
    2020             :                             struct fib_result *res,
    2021             :                             struct in_device *in_dev,
    2022             :                             __be32 daddr, __be32 saddr, u32 tos,
    2023             :                             struct flow_keys *hkeys)
    2024             : {
    2025             : #ifdef CONFIG_IP_ROUTE_MULTIPATH
    2026             :         if (res->fi && fib_info_num_path(res->fi) > 1) {
    2027             :                 int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
    2028             : 
    2029             :                 fib_select_multipath(res, h);
    2030             :         }
    2031             : #endif
    2032             : 
    2033             :         /* create a routing cache entry */
    2034           0 :         return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
    2035             : }
    2036             : 
    2037             : /* Implements all the saddr-related checks as ip_route_input_slow(),
    2038             :  * assuming daddr is valid and the destination is not a local broadcast one.
    2039             :  * Uses the provided hint instead of performing a route lookup.
    2040             :  */
    2041          49 : int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
    2042             :                       u8 tos, struct net_device *dev,
    2043             :                       const struct sk_buff *hint)
    2044             : {
    2045          49 :         struct in_device *in_dev = __in_dev_get_rcu(dev);
    2046          49 :         struct rtable *rt = skb_rtable(hint);
    2047          49 :         struct net *net = dev_net(dev);
    2048          49 :         int err = -EINVAL;
    2049          49 :         u32 tag = 0;
    2050             : 
    2051          49 :         if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
    2052           0 :                 goto martian_source;
    2053             : 
    2054          49 :         if (ipv4_is_zeronet(saddr))
    2055           0 :                 goto martian_source;
    2056             : 
    2057          49 :         if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
    2058           0 :                 goto martian_source;
    2059             : 
    2060          49 :         if (rt->rt_type != RTN_LOCAL)
    2061           0 :                 goto skip_validate_source;
    2062             : 
    2063          49 :         tos &= IPTOS_RT_MASK;
    2064          49 :         err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
    2065          49 :         if (err < 0)
    2066           0 :                 goto martian_source;
    2067             : 
    2068          49 : skip_validate_source:
    2069          49 :         skb_dst_copy(skb, hint);
    2070          49 :         return 0;
    2071             : 
    2072           0 : martian_source:
    2073           0 :         ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
    2074           0 :         return err;
    2075             : }
    2076             : 
    2077             : /*
    2078             :  *      NOTE. We drop all the packets that has local source
    2079             :  *      addresses, because every properly looped back packet
    2080             :  *      must have correct destination already attached by output routine.
    2081             :  *      Changes in the enforced policies must be applied also to
    2082             :  *      ip_route_use_hint().
    2083             :  *
    2084             :  *      Such approach solves two big problems:
    2085             :  *      1. Not simplex devices are handled properly.
    2086             :  *      2. IP spoofing attempts are filtered with 100% of guarantee.
    2087             :  *      called with rcu_read_lock()
    2088             :  */
    2089             : 
    2090          24 : static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
    2091             :                                u8 tos, struct net_device *dev,
    2092             :                                struct fib_result *res)
    2093             : {
    2094          24 :         struct in_device *in_dev = __in_dev_get_rcu(dev);
    2095          24 :         struct flow_keys *flkeys = NULL, _flkeys;
    2096          24 :         struct net    *net = dev_net(dev);
    2097          24 :         struct ip_tunnel_info *tun_info;
    2098          24 :         int             err = -EINVAL;
    2099          24 :         unsigned int    flags = 0;
    2100          24 :         u32             itag = 0;
    2101          24 :         struct rtable   *rth;
    2102          24 :         struct flowi4   fl4;
    2103          24 :         bool do_cache = true;
    2104             : 
    2105             :         /* IP on this device is disabled. */
    2106             : 
    2107          24 :         if (!in_dev)
    2108           0 :                 goto out;
    2109             : 
    2110             :         /* Check for the most weird martians, which can be not detected
    2111             :            by fib_lookup.
    2112             :          */
    2113             : 
    2114          24 :         tun_info = skb_tunnel_info(skb);
    2115          24 :         if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
    2116           0 :                 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
    2117             :         else
    2118          24 :                 fl4.flowi4_tun_key.tun_id = 0;
    2119          24 :         skb_dst_drop(skb);
    2120             : 
    2121          24 :         if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
    2122           0 :                 goto martian_source;
    2123             : 
    2124          24 :         res->fi = NULL;
    2125          24 :         res->table = NULL;
    2126          24 :         if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
    2127           2 :                 goto brd_input;
    2128             : 
    2129             :         /* Accept zero addresses only to limited broadcast;
    2130             :          * I even do not know to fix it or not. Waiting for complains :-)
    2131             :          */
    2132          22 :         if (ipv4_is_zeronet(saddr))
    2133           0 :                 goto martian_source;
    2134             : 
    2135          22 :         if (ipv4_is_zeronet(daddr))
    2136           0 :                 goto martian_destination;
    2137             : 
    2138             :         /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
    2139             :          * and call it once if daddr or/and saddr are loopback addresses
    2140             :          */
    2141          22 :         if (ipv4_is_loopback(daddr)) {
    2142           0 :                 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
    2143           0 :                         goto martian_destination;
    2144          22 :         } else if (ipv4_is_loopback(saddr)) {
    2145           0 :                 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
    2146           0 :                         goto martian_source;
    2147             :         }
    2148             : 
    2149             :         /*
    2150             :          *      Now we are ready to route packet.
    2151             :          */
    2152          22 :         fl4.flowi4_oif = 0;
    2153          22 :         fl4.flowi4_iif = dev->ifindex;
    2154          22 :         fl4.flowi4_mark = skb->mark;
    2155          22 :         fl4.flowi4_tos = tos;
    2156          22 :         fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
    2157          22 :         fl4.flowi4_flags = 0;
    2158          22 :         fl4.daddr = daddr;
    2159          22 :         fl4.saddr = saddr;
    2160          22 :         fl4.flowi4_uid = sock_net_uid(net, NULL);
    2161          22 :         fl4.flowi4_multipath_hash = 0;
    2162             : 
    2163          22 :         if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) {
    2164             :                 flkeys = &_flkeys;
    2165             :         } else {
    2166          22 :                 fl4.flowi4_proto = 0;
    2167          22 :                 fl4.fl4_sport = 0;
    2168          22 :                 fl4.fl4_dport = 0;
    2169             :         }
    2170             : 
    2171          22 :         err = fib_lookup(net, &fl4, res, 0);
    2172          22 :         if (err != 0) {
    2173           0 :                 if (!IN_DEV_FORWARD(in_dev))
    2174           0 :                         err = -EHOSTUNREACH;
    2175           0 :                 goto no_route;
    2176             :         }
    2177             : 
    2178          22 :         if (res->type == RTN_BROADCAST) {
    2179           0 :                 if (IN_DEV_BFORWARD(in_dev))
    2180           0 :                         goto make_route;
    2181             :                 /* not do cache if bc_forwarding is enabled */
    2182           0 :                 if (IPV4_DEVCONF_ALL(net, BC_FORWARDING))
    2183           0 :                         do_cache = false;
    2184           0 :                 goto brd_input;
    2185             :         }
    2186             : 
    2187          22 :         if (res->type == RTN_LOCAL) {
    2188          22 :                 err = fib_validate_source(skb, saddr, daddr, tos,
    2189             :                                           0, dev, in_dev, &itag);
    2190          22 :                 if (err < 0)
    2191           0 :                         goto martian_source;
    2192          22 :                 goto local_input;
    2193             :         }
    2194             : 
    2195           0 :         if (!IN_DEV_FORWARD(in_dev)) {
    2196           0 :                 err = -EHOSTUNREACH;
    2197           0 :                 goto no_route;
    2198             :         }
    2199           0 :         if (res->type != RTN_UNICAST)
    2200           0 :                 goto martian_destination;
    2201             : 
    2202           0 : make_route:
    2203           0 :         err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
    2204          24 : out:    return err;
    2205             : 
    2206           2 : brd_input:
    2207           2 :         if (skb->protocol != htons(ETH_P_IP))
    2208           0 :                 goto e_inval;
    2209             : 
    2210           2 :         if (!ipv4_is_zeronet(saddr)) {
    2211           2 :                 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
    2212             :                                           in_dev, &itag);
    2213           2 :                 if (err < 0)
    2214           0 :                         goto martian_source;
    2215             :         }
    2216           2 :         flags |= RTCF_BROADCAST;
    2217           2 :         res->type = RTN_BROADCAST;
    2218           2 :         RT_CACHE_STAT_INC(in_brd);
    2219             : 
    2220          24 : local_input:
    2221          24 :         do_cache &= res->fi && !itag;
    2222          24 :         if (do_cache) {
    2223          22 :                 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
    2224             : 
    2225          22 :                 rth = rcu_dereference(nhc->nhc_rth_input);
    2226          22 :                 if (rt_cache_valid(rth)) {
    2227          21 :                         skb_dst_set_noref(skb, &rth->dst);
    2228          21 :                         err = 0;
    2229          21 :                         goto out;
    2230             :                 }
    2231             :         }
    2232             : 
    2233           9 :         rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
    2234           3 :                            flags | RTCF_LOCAL, res->type,
    2235           3 :                            IN_DEV_ORCONF(in_dev, NOPOLICY), false);
    2236           3 :         if (!rth)
    2237           0 :                 goto e_nobufs;
    2238             : 
    2239           3 :         rth->dst.output= ip_rt_bug;
    2240             : #ifdef CONFIG_IP_ROUTE_CLASSID
    2241             :         rth->dst.tclassid = itag;
    2242             : #endif
    2243           3 :         rth->rt_is_input = 1;
    2244             : 
    2245           3 :         RT_CACHE_STAT_INC(in_slow_tot);
    2246           3 :         if (res->type == RTN_UNREACHABLE) {
    2247           0 :                 rth->dst.input= ip_error;
    2248           0 :                 rth->dst.error= -err;
    2249           0 :                 rth->rt_flags        &= ~RTCF_LOCAL;
    2250             :         }
    2251             : 
    2252           3 :         if (do_cache) {
    2253           1 :                 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
    2254             : 
    2255           1 :                 rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
    2256           1 :                 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
    2257             :                         WARN_ON(rth->dst.input == lwtunnel_input);
    2258             :                         rth->dst.lwtstate->orig_input = rth->dst.input;
    2259             :                         rth->dst.input = lwtunnel_input;
    2260             :                 }
    2261             : 
    2262           1 :                 if (unlikely(!rt_cache_route(nhc, rth)))
    2263           0 :                         rt_add_uncached_list(rth);
    2264             :         }
    2265           3 :         skb_dst_set(skb, &rth->dst);
    2266           3 :         err = 0;
    2267           3 :         goto out;
    2268             : 
    2269           0 : no_route:
    2270           0 :         RT_CACHE_STAT_INC(in_no_route);
    2271           0 :         res->type = RTN_UNREACHABLE;
    2272           0 :         res->fi = NULL;
    2273           0 :         res->table = NULL;
    2274           0 :         goto local_input;
    2275             : 
    2276             :         /*
    2277             :          *      Do not cache martian addresses: they should be logged (RFC1812)
    2278             :          */
    2279           0 : martian_destination:
    2280           0 :         RT_CACHE_STAT_INC(in_martian_dst);
    2281             : #ifdef CONFIG_IP_ROUTE_VERBOSE
    2282             :         if (IN_DEV_LOG_MARTIANS(in_dev))
    2283             :                 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
    2284             :                                      &daddr, &saddr, dev->name);
    2285             : #endif
    2286             : 
    2287           0 : e_inval:
    2288           0 :         err = -EINVAL;
    2289           0 :         goto out;
    2290             : 
    2291           0 : e_nobufs:
    2292           0 :         err = -ENOBUFS;
    2293           0 :         goto out;
    2294             : 
    2295           0 : martian_source:
    2296           0 :         ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
    2297           0 :         goto out;
    2298             : }
    2299             : 
    2300          24 : int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
    2301             :                          u8 tos, struct net_device *dev)
    2302             : {
    2303          24 :         struct fib_result res;
    2304          24 :         int err;
    2305             : 
    2306          24 :         tos &= IPTOS_RT_MASK;
    2307          24 :         rcu_read_lock();
    2308          24 :         err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
    2309          24 :         rcu_read_unlock();
    2310             : 
    2311          24 :         return err;
    2312             : }
    2313             : EXPORT_SYMBOL(ip_route_input_noref);
    2314             : 
    2315             : /* called with rcu_read_lock held */
    2316          24 : int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
    2317             :                        u8 tos, struct net_device *dev, struct fib_result *res)
    2318             : {
    2319             :         /* Multicast recognition logic is moved from route cache to here.
    2320             :            The problem was that too many Ethernet cards have broken/missing
    2321             :            hardware multicast filters :-( As result the host on multicasting
    2322             :            network acquires a lot of useless route cache entries, sort of
    2323             :            SDR messages from all the world. Now we try to get rid of them.
    2324             :            Really, provided software IP multicast filter is organized
    2325             :            reasonably (at least, hashed), it does not result in a slowdown
    2326             :            comparing with route cache reject entries.
    2327             :            Note, that multicast routers are not affected, because
    2328             :            route cache entry is created eventually.
    2329             :          */
    2330          24 :         if (ipv4_is_multicast(daddr)) {
    2331           0 :                 struct in_device *in_dev = __in_dev_get_rcu(dev);
    2332           0 :                 int our = 0;
    2333           0 :                 int err = -EINVAL;
    2334             : 
    2335           0 :                 if (!in_dev)
    2336             :                         return err;
    2337           0 :                 our = ip_check_mc_rcu(in_dev, daddr, saddr,
    2338           0 :                                       ip_hdr(skb)->protocol);
    2339             : 
    2340             :                 /* check l3 master if no match yet */
    2341           0 :                 if (!our && netif_is_l3_slave(dev)) {
    2342           0 :                         struct in_device *l3_in_dev;
    2343             : 
    2344           0 :                         l3_in_dev = __in_dev_get_rcu(skb->dev);
    2345           0 :                         if (l3_in_dev)
    2346           0 :                                 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
    2347           0 :                                                       ip_hdr(skb)->protocol);
    2348             :                 }
    2349             : 
    2350           0 :                 if (our
    2351             : #ifdef CONFIG_IP_MROUTE
    2352             :                         ||
    2353             :                     (!ipv4_is_local_multicast(daddr) &&
    2354             :                      IN_DEV_MFORWARD(in_dev))
    2355             : #endif
    2356             :                    ) {
    2357           0 :                         err = ip_route_input_mc(skb, daddr, saddr,
    2358             :                                                 tos, dev, our);
    2359             :                 }
    2360           0 :                 return err;
    2361             :         }
    2362             : 
    2363          24 :         return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
    2364             : }
    2365             : 
    2366             : /* called with rcu_read_lock() */
    2367          38 : static struct rtable *__mkroute_output(const struct fib_result *res,
    2368             :                                        const struct flowi4 *fl4, int orig_oif,
    2369             :                                        struct net_device *dev_out,
    2370             :                                        unsigned int flags)
    2371             : {
    2372          38 :         struct fib_info *fi = res->fi;
    2373          38 :         struct fib_nh_exception *fnhe;
    2374          38 :         struct in_device *in_dev;
    2375          38 :         u16 type = res->type;
    2376          38 :         struct rtable *rth;
    2377          38 :         bool do_cache;
    2378             : 
    2379          38 :         in_dev = __in_dev_get_rcu(dev_out);
    2380          38 :         if (!in_dev)
    2381          38 :                 return ERR_PTR(-EINVAL);
    2382             : 
    2383          38 :         if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
    2384          38 :                 if (ipv4_is_loopback(fl4->saddr) &&
    2385          12 :                     !(dev_out->flags & IFF_LOOPBACK) &&
    2386           0 :                     !netif_is_l3_master(dev_out))
    2387          38 :                         return ERR_PTR(-EINVAL);
    2388             : 
    2389          38 :         if (ipv4_is_lbcast(fl4->daddr))
    2390             :                 type = RTN_BROADCAST;
    2391          38 :         else if (ipv4_is_multicast(fl4->daddr))
    2392             :                 type = RTN_MULTICAST;
    2393          38 :         else if (ipv4_is_zeronet(fl4->daddr))
    2394          38 :                 return ERR_PTR(-EINVAL);
    2395             : 
    2396          38 :         if (dev_out->flags & IFF_LOOPBACK)
    2397          12 :                 flags |= RTCF_LOCAL;
    2398             : 
    2399          38 :         do_cache = true;
    2400          38 :         if (type == RTN_BROADCAST) {
    2401           0 :                 flags |= RTCF_BROADCAST | RTCF_LOCAL;
    2402           0 :                 fi = NULL;
    2403          38 :         } else if (type == RTN_MULTICAST) {
    2404           0 :                 flags |= RTCF_MULTICAST | RTCF_LOCAL;
    2405           0 :                 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
    2406           0 :                                      fl4->flowi4_proto))
    2407           0 :                         flags &= ~RTCF_LOCAL;
    2408             :                 else
    2409             :                         do_cache = false;
    2410             :                 /* If multicast route do not exist use
    2411             :                  * default one, but do not gateway in this case.
    2412             :                  * Yes, it is hack.
    2413             :                  */
    2414           0 :                 if (fi && res->prefixlen < 4)
    2415           0 :                         fi = NULL;
    2416          38 :         } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
    2417           6 :                    (orig_oif != dev_out->ifindex)) {
    2418             :                 /* For local routes that require a particular output interface
    2419             :                  * we do not want to cache the result.  Caching the result
    2420             :                  * causes incorrect behaviour when there are multiple source
    2421             :                  * addresses on the interface, the end result being that if the
    2422             :                  * intended recipient is waiting on that interface for the
    2423             :                  * packet he won't receive it because it will be delivered on
    2424             :                  * the loopback interface and the IP_PKTINFO ipi_ifindex will
    2425             :                  * be set to the loopback interface as well.
    2426             :                  */
    2427           0 :                 do_cache = false;
    2428             :         }
    2429             : 
    2430          38 :         fnhe = NULL;
    2431          38 :         do_cache &= fi != NULL;
    2432          38 :         if (fi) {
    2433          32 :                 struct fib_nh_common *nhc = FIB_RES_NHC(*res);
    2434          32 :                 struct rtable __rcu **prth;
    2435             : 
    2436          32 :                 fnhe = find_exception(nhc, fl4->daddr);
    2437          32 :                 if (!do_cache)
    2438           0 :                         goto add;
    2439          32 :                 if (fnhe) {
    2440           0 :                         prth = &fnhe->fnhe_rth_output;
    2441             :                 } else {
    2442          32 :                         if (unlikely(fl4->flowi4_flags &
    2443             :                                      FLOWI_FLAG_KNOWN_NH &&
    2444             :                                      !(nhc->nhc_gw_family &&
    2445             :                                        nhc->nhc_scope == RT_SCOPE_LINK))) {
    2446           0 :                                 do_cache = false;
    2447           0 :                                 goto add;
    2448             :                         }
    2449          32 :                         prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
    2450             :                 }
    2451          32 :                 rth = rcu_dereference(*prth);
    2452          32 :                 if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))
    2453             :                         return rth;
    2454             :         }
    2455             : 
    2456          13 : add:
    2457          13 :         rth = rt_dst_alloc(dev_out, flags, type,
    2458          13 :                            IN_DEV_ORCONF(in_dev, NOPOLICY),
    2459          13 :                            IN_DEV_ORCONF(in_dev, NOXFRM));
    2460          13 :         if (!rth)
    2461          38 :                 return ERR_PTR(-ENOBUFS);
    2462             : 
    2463          13 :         rth->rt_iif = orig_oif;
    2464             : 
    2465          13 :         RT_CACHE_STAT_INC(out_slow_tot);
    2466             : 
    2467          13 :         if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
    2468           0 :                 if (flags & RTCF_LOCAL &&
    2469           0 :                     !(dev_out->flags & IFF_LOOPBACK)) {
    2470           0 :                         rth->dst.output = ip_mc_output;
    2471          13 :                         RT_CACHE_STAT_INC(out_slow_mc);
    2472             :                 }
    2473             : #ifdef CONFIG_IP_MROUTE
    2474             :                 if (type == RTN_MULTICAST) {
    2475             :                         if (IN_DEV_MFORWARD(in_dev) &&
    2476             :                             !ipv4_is_local_multicast(fl4->daddr)) {
    2477             :                                 rth->dst.input = ip_mr_input;
    2478             :                                 rth->dst.output = ip_mc_output;
    2479             :                         }
    2480             :                 }
    2481             : #endif
    2482             :         }
    2483             : 
    2484          13 :         rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache);
    2485          13 :         lwtunnel_set_redirect(&rth->dst);
    2486             : 
    2487          13 :         return rth;
    2488             : }
    2489             : 
    2490             : /*
    2491             :  * Major route resolver routine.
    2492             :  */
    2493             : 
    2494          58 : struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
    2495             :                                         const struct sk_buff *skb)
    2496             : {
    2497          58 :         __u8 tos = RT_FL_TOS(fl4);
    2498          58 :         struct fib_result res = {
    2499             :                 .type           = RTN_UNSPEC,
    2500             :                 .fi             = NULL,
    2501             :                 .table          = NULL,
    2502             :                 .tclassid       = 0,
    2503             :         };
    2504          58 :         struct rtable *rth;
    2505             : 
    2506          58 :         fl4->flowi4_iif = LOOPBACK_IFINDEX;
    2507          58 :         fl4->flowi4_tos = tos & IPTOS_RT_MASK;
    2508          58 :         fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
    2509             :                          RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
    2510             : 
    2511          58 :         rcu_read_lock();
    2512          58 :         rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb);
    2513          58 :         rcu_read_unlock();
    2514             : 
    2515          58 :         return rth;
    2516             : }
    2517             : EXPORT_SYMBOL_GPL(ip_route_output_key_hash);
    2518             : 
    2519          58 : struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
    2520             :                                             struct fib_result *res,
    2521             :                                             const struct sk_buff *skb)
    2522             : {
    2523          58 :         struct net_device *dev_out = NULL;
    2524          58 :         int orig_oif = fl4->flowi4_oif;
    2525          58 :         unsigned int flags = 0;
    2526          58 :         struct rtable *rth;
    2527          58 :         int err;
    2528             : 
    2529          58 :         if (fl4->saddr) {
    2530          25 :                 if (ipv4_is_multicast(fl4->saddr) ||
    2531          25 :                     ipv4_is_lbcast(fl4->saddr) ||
    2532          25 :                     ipv4_is_zeronet(fl4->saddr)) {
    2533           0 :                         rth = ERR_PTR(-EINVAL);
    2534           0 :                         goto out;
    2535             :                 }
    2536             : 
    2537          25 :                 rth = ERR_PTR(-ENETUNREACH);
    2538             : 
    2539             :                 /* I removed check for oif == dev_out->oif here.
    2540             :                    It was wrong for two reasons:
    2541             :                    1. ip_dev_find(net, saddr) can return wrong iface, if saddr
    2542             :                       is assigned to multiple interfaces.
    2543             :                    2. Moreover, we are allowed to send packets with saddr
    2544             :                       of another iface. --ANK
    2545             :                  */
    2546             : 
    2547          25 :                 if (fl4->flowi4_oif == 0 &&
    2548          25 :                     (ipv4_is_multicast(fl4->daddr) ||
    2549          25 :                      ipv4_is_lbcast(fl4->daddr))) {
    2550             :                         /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
    2551           0 :                         dev_out = __ip_dev_find(net, fl4->saddr, false);
    2552           0 :                         if (!dev_out)
    2553           0 :                                 goto out;
    2554             : 
    2555             :                         /* Special hack: user can direct multicasts
    2556             :                            and limited broadcast via necessary interface
    2557             :                            without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
    2558             :                            This hack is not just for fun, it allows
    2559             :                            vic,vat and friends to work.
    2560             :                            They bind socket to loopback, set ttl to zero
    2561             :                            and expect that it will work.
    2562             :                            From the viewpoint of routing cache they are broken,
    2563             :                            because we are not allowed to build multicast path
    2564             :                            with loopback source addr (look, routing cache
    2565             :                            cannot know, that ttl is zero, so that packet
    2566             :                            will not leave this host and route is valid).
    2567             :                            Luckily, this hack is good workaround.
    2568             :                          */
    2569             : 
    2570           0 :                         fl4->flowi4_oif = dev_out->ifindex;
    2571           0 :                         goto make_route;
    2572             :                 }
    2573             : 
    2574          25 :                 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
    2575             :                         /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
    2576          25 :                         if (!__ip_dev_find(net, fl4->saddr, false))
    2577           0 :                                 goto out;
    2578             :                 }
    2579             :         }
    2580             : 
    2581             : 
    2582          58 :         if (fl4->flowi4_oif) {
    2583           0 :                 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
    2584           0 :                 rth = ERR_PTR(-ENODEV);
    2585           0 :                 if (!dev_out)
    2586           0 :                         goto out;
    2587             : 
    2588             :                 /* RACE: Check return value of inet_select_addr instead. */
    2589           0 :                 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
    2590           0 :                         rth = ERR_PTR(-ENETUNREACH);
    2591           0 :                         goto out;
    2592             :                 }
    2593           0 :                 if (ipv4_is_local_multicast(fl4->daddr) ||
    2594           0 :                     ipv4_is_lbcast(fl4->daddr) ||
    2595           0 :                     fl4->flowi4_proto == IPPROTO_IGMP) {
    2596           0 :                         if (!fl4->saddr)
    2597           0 :                                 fl4->saddr = inet_select_addr(dev_out, 0,
    2598             :                                                               RT_SCOPE_LINK);
    2599           0 :                         goto make_route;
    2600             :                 }
    2601           0 :                 if (!fl4->saddr) {
    2602           0 :                         if (ipv4_is_multicast(fl4->daddr))
    2603           0 :                                 fl4->saddr = inet_select_addr(dev_out, 0,
    2604           0 :                                                               fl4->flowi4_scope);
    2605           0 :                         else if (!fl4->daddr)
    2606           0 :                                 fl4->saddr = inet_select_addr(dev_out, 0,
    2607             :                                                               RT_SCOPE_HOST);
    2608             :                 }
    2609             :         }
    2610             : 
    2611          58 :         if (!fl4->daddr) {
    2612           6 :                 fl4->daddr = fl4->saddr;
    2613           6 :                 if (!fl4->daddr)
    2614           6 :                         fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
    2615           6 :                 dev_out = net->loopback_dev;
    2616           6 :                 fl4->flowi4_oif = LOOPBACK_IFINDEX;
    2617           6 :                 res->type = RTN_LOCAL;
    2618           6 :                 flags |= RTCF_LOCAL;
    2619           6 :                 goto make_route;
    2620             :         }
    2621             : 
    2622          52 :         err = fib_lookup(net, fl4, res, 0);
    2623          52 :         if (err) {
    2624          20 :                 res->fi = NULL;
    2625          20 :                 res->table = NULL;
    2626          20 :                 if (fl4->flowi4_oif &&
    2627           0 :                     (ipv4_is_multicast(fl4->daddr) ||
    2628           0 :                     !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
    2629             :                         /* Apparently, routing tables are wrong. Assume,
    2630             :                            that the destination is on link.
    2631             : 
    2632             :                            WHY? DW.
    2633             :                            Because we are allowed to send to iface
    2634             :                            even if it has NO routes and NO assigned
    2635             :                            addresses. When oif is specified, routing
    2636             :                            tables are looked up with only one purpose:
    2637             :                            to catch if destination is gatewayed, rather than
    2638             :                            direct. Moreover, if MSG_DONTROUTE is set,
    2639             :                            we send packet, ignoring both routing tables
    2640             :                            and ifaddr state. --ANK
    2641             : 
    2642             : 
    2643             :                            We could make it even if oif is unknown,
    2644             :                            likely IPv6, but we do not.
    2645             :                          */
    2646             : 
    2647           0 :                         if (fl4->saddr == 0)
    2648           0 :                                 fl4->saddr = inet_select_addr(dev_out, 0,
    2649             :                                                               RT_SCOPE_LINK);
    2650           0 :                         res->type = RTN_UNICAST;
    2651           0 :                         goto make_route;
    2652             :                 }
    2653          20 :                 rth = ERR_PTR(err);
    2654          20 :                 goto out;
    2655             :         }
    2656             : 
    2657          32 :         if (res->type == RTN_LOCAL) {
    2658           6 :                 if (!fl4->saddr) {
    2659           0 :                         if (res->fi->fib_prefsrc)
    2660           0 :                                 fl4->saddr = res->fi->fib_prefsrc;
    2661             :                         else
    2662           0 :                                 fl4->saddr = fl4->daddr;
    2663             :                 }
    2664             : 
    2665             :                 /* L3 master device is the loopback for that domain */
    2666           6 :                 dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? :
    2667             :                         net->loopback_dev;
    2668             : 
    2669             :                 /* make sure orig_oif points to fib result device even
    2670             :                  * though packet rx/tx happens over loopback or l3mdev
    2671             :                  */
    2672           6 :                 orig_oif = FIB_RES_OIF(*res);
    2673             : 
    2674           6 :                 fl4->flowi4_oif = dev_out->ifindex;
    2675           6 :                 flags |= RTCF_LOCAL;
    2676           6 :                 goto make_route;
    2677             :         }
    2678             : 
    2679          26 :         fib_select_path(net, res, fl4, skb);
    2680             : 
    2681          26 :         dev_out = FIB_RES_DEV(*res);
    2682             : 
    2683          38 : make_route:
    2684          38 :         rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags);
    2685             : 
    2686          58 : out:
    2687          58 :         return rth;
    2688             : }
    2689             : 
    2690           0 : static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
    2691             : {
    2692           0 :         return NULL;
    2693             : }
    2694             : 
    2695           0 : static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
    2696             : {
    2697           0 :         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
    2698             : 
    2699           0 :         return mtu ? : dst->dev->mtu;
    2700             : }
    2701             : 
    2702           0 : static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
    2703             :                                           struct sk_buff *skb, u32 mtu,
    2704             :                                           bool confirm_neigh)
    2705             : {
    2706           0 : }
    2707             : 
    2708           0 : static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
    2709             :                                        struct sk_buff *skb)
    2710             : {
    2711           0 : }
    2712             : 
    2713           0 : static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
    2714             :                                           unsigned long old)
    2715             : {
    2716           0 :         return NULL;
    2717             : }
    2718             : 
    2719             : static struct dst_ops ipv4_dst_blackhole_ops = {
    2720             :         .family                 =       AF_INET,
    2721             :         .check                  =       ipv4_blackhole_dst_check,
    2722             :         .mtu                    =       ipv4_blackhole_mtu,
    2723             :         .default_advmss         =       ipv4_default_advmss,
    2724             :         .update_pmtu            =       ipv4_rt_blackhole_update_pmtu,
    2725             :         .redirect               =       ipv4_rt_blackhole_redirect,
    2726             :         .cow_metrics            =       ipv4_rt_blackhole_cow_metrics,
    2727             :         .neigh_lookup           =       ipv4_neigh_lookup,
    2728             : };
    2729             : 
    2730           0 : struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
    2731             : {
    2732           0 :         struct rtable *ort = (struct rtable *) dst_orig;
    2733           0 :         struct rtable *rt;
    2734             : 
    2735           0 :         rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
    2736           0 :         if (rt) {
    2737           0 :                 struct dst_entry *new = &rt->dst;
    2738             : 
    2739           0 :                 new->__use = 1;
    2740           0 :                 new->input = dst_discard;
    2741           0 :                 new->output = dst_discard_out;
    2742             : 
    2743           0 :                 new->dev = net->loopback_dev;
    2744           0 :                 if (new->dev)
    2745           0 :                         dev_hold(new->dev);
    2746             : 
    2747           0 :                 rt->rt_is_input = ort->rt_is_input;
    2748           0 :                 rt->rt_iif = ort->rt_iif;
    2749           0 :                 rt->rt_pmtu = ort->rt_pmtu;
    2750           0 :                 rt->rt_mtu_locked = ort->rt_mtu_locked;
    2751             : 
    2752           0 :                 rt->rt_genid = rt_genid_ipv4(net);
    2753           0 :                 rt->rt_flags = ort->rt_flags;
    2754           0 :                 rt->rt_type = ort->rt_type;
    2755           0 :                 rt->rt_uses_gateway = ort->rt_uses_gateway;
    2756           0 :                 rt->rt_gw_family = ort->rt_gw_family;
    2757           0 :                 if (rt->rt_gw_family == AF_INET)
    2758           0 :                         rt->rt_gw4 = ort->rt_gw4;
    2759           0 :                 else if (rt->rt_gw_family == AF_INET6)
    2760           0 :                         rt->rt_gw6 = ort->rt_gw6;
    2761             : 
    2762           0 :                 INIT_LIST_HEAD(&rt->rt_uncached);
    2763             :         }
    2764             : 
    2765           0 :         dst_release(dst_orig);
    2766             : 
    2767           0 :         return rt ? &rt->dst : ERR_PTR(-ENOMEM);
    2768             : }
    2769             : 
    2770          25 : struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
    2771             :                                     const struct sock *sk)
    2772             : {
    2773          25 :         struct rtable *rt = __ip_route_output_key(net, flp4);
    2774             : 
    2775          25 :         if (IS_ERR(rt))
    2776             :                 return rt;
    2777             : 
    2778          25 :         if (flp4->flowi4_proto) {
    2779          25 :                 flp4->flowi4_oif = rt->dst.dev->ifindex;
    2780          25 :                 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
    2781          25 :                                                         flowi4_to_flowi(flp4),
    2782             :                                                         sk, 0);
    2783             :         }
    2784             : 
    2785             :         return rt;
    2786             : }
    2787             : EXPORT_SYMBOL_GPL(ip_route_output_flow);
    2788             : 
    2789           0 : struct rtable *ip_route_output_tunnel(struct sk_buff *skb,
    2790             :                                       struct net_device *dev,
    2791             :                                       struct net *net, __be32 *saddr,
    2792             :                                       const struct ip_tunnel_info *info,
    2793             :                                       u8 protocol, bool use_cache)
    2794             : {
    2795             : #ifdef CONFIG_DST_CACHE
    2796             :         struct dst_cache *dst_cache;
    2797             : #endif
    2798           0 :         struct rtable *rt = NULL;
    2799           0 :         struct flowi4 fl4;
    2800           0 :         __u8 tos;
    2801             : 
    2802             : #ifdef CONFIG_DST_CACHE
    2803             :         dst_cache = (struct dst_cache *)&info->dst_cache;
    2804             :         if (use_cache) {
    2805             :                 rt = dst_cache_get_ip4(dst_cache, saddr);
    2806             :                 if (rt)
    2807             :                         return rt;
    2808             :         }
    2809             : #endif
    2810           0 :         memset(&fl4, 0, sizeof(fl4));
    2811           0 :         fl4.flowi4_mark = skb->mark;
    2812           0 :         fl4.flowi4_proto = protocol;
    2813           0 :         fl4.daddr = info->key.u.ipv4.dst;
    2814           0 :         fl4.saddr = info->key.u.ipv4.src;
    2815           0 :         tos = info->key.tos;
    2816           0 :         fl4.flowi4_tos = RT_TOS(tos);
    2817             : 
    2818           0 :         rt = ip_route_output_key(net, &fl4);
    2819           0 :         if (IS_ERR(rt)) {
    2820             :                 netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
    2821           0 :                 return ERR_PTR(-ENETUNREACH);
    2822             :         }
    2823           0 :         if (rt->dst.dev == dev) { /* is this necessary? */
    2824           0 :                 netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
    2825           0 :                 ip_rt_put(rt);
    2826           0 :                 return ERR_PTR(-ELOOP);
    2827             :         }
    2828             : #ifdef CONFIG_DST_CACHE
    2829             :         if (use_cache)
    2830             :                 dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
    2831             : #endif
    2832           0 :         *saddr = fl4.saddr;
    2833           0 :         return rt;
    2834             : }
    2835             : EXPORT_SYMBOL_GPL(ip_route_output_tunnel);
    2836             : 
    2837             : /* called with rcu_read_lock held */
    2838           0 : static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
    2839             :                         struct rtable *rt, u32 table_id, struct flowi4 *fl4,
    2840             :                         struct sk_buff *skb, u32 portid, u32 seq,
    2841             :                         unsigned int flags)
    2842             : {
    2843           0 :         struct rtmsg *r;
    2844           0 :         struct nlmsghdr *nlh;
    2845           0 :         unsigned long expires = 0;
    2846           0 :         u32 error;
    2847           0 :         u32 metrics[RTAX_MAX];
    2848             : 
    2849           0 :         nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags);
    2850           0 :         if (!nlh)
    2851             :                 return -EMSGSIZE;
    2852             : 
    2853           0 :         r = nlmsg_data(nlh);
    2854           0 :         r->rtm_family         = AF_INET;
    2855           0 :         r->rtm_dst_len       = 32;
    2856           0 :         r->rtm_src_len       = 0;
    2857           0 :         r->rtm_tos   = fl4 ? fl4->flowi4_tos : 0;
    2858           0 :         r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT;
    2859           0 :         if (nla_put_u32(skb, RTA_TABLE, table_id))
    2860           0 :                 goto nla_put_failure;
    2861           0 :         r->rtm_type  = rt->rt_type;
    2862           0 :         r->rtm_scope = RT_SCOPE_UNIVERSE;
    2863           0 :         r->rtm_protocol = RTPROT_UNSPEC;
    2864           0 :         r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
    2865           0 :         if (rt->rt_flags & RTCF_NOTIFY)
    2866           0 :                 r->rtm_flags |= RTM_F_NOTIFY;
    2867           0 :         if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
    2868           0 :                 r->rtm_flags |= RTCF_DOREDIRECT;
    2869             : 
    2870           0 :         if (nla_put_in_addr(skb, RTA_DST, dst))
    2871           0 :                 goto nla_put_failure;
    2872           0 :         if (src) {
    2873           0 :                 r->rtm_src_len = 32;
    2874           0 :                 if (nla_put_in_addr(skb, RTA_SRC, src))
    2875           0 :                         goto nla_put_failure;
    2876             :         }
    2877           0 :         if (rt->dst.dev &&
    2878           0 :             nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
    2879           0 :                 goto nla_put_failure;
    2880           0 :         if (rt->dst.lwtstate &&
    2881           0 :             lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
    2882             :                 goto nla_put_failure;
    2883             : #ifdef CONFIG_IP_ROUTE_CLASSID
    2884             :         if (rt->dst.tclassid &&
    2885             :             nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
    2886             :                 goto nla_put_failure;
    2887             : #endif
    2888           0 :         if (fl4 && !rt_is_input_route(rt) &&
    2889           0 :             fl4->saddr != src) {
    2890           0 :                 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
    2891           0 :                         goto nla_put_failure;
    2892             :         }
    2893           0 :         if (rt->rt_uses_gateway) {
    2894           0 :                 if (rt->rt_gw_family == AF_INET &&
    2895           0 :                     nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
    2896           0 :                         goto nla_put_failure;
    2897           0 :                 } else if (rt->rt_gw_family == AF_INET6) {
    2898           0 :                         int alen = sizeof(struct in6_addr);
    2899           0 :                         struct nlattr *nla;
    2900           0 :                         struct rtvia *via;
    2901             : 
    2902           0 :                         nla = nla_reserve(skb, RTA_VIA, alen + 2);
    2903           0 :                         if (!nla)
    2904           0 :                                 goto nla_put_failure;
    2905             : 
    2906           0 :                         via = nla_data(nla);
    2907           0 :                         via->rtvia_family = AF_INET6;
    2908           0 :                         memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
    2909             :                 }
    2910             :         }
    2911             : 
    2912           0 :         expires = rt->dst.expires;
    2913           0 :         if (expires) {
    2914           0 :                 unsigned long now = jiffies;
    2915             : 
    2916           0 :                 if (time_before(now, expires))
    2917           0 :                         expires -= now;
    2918             :                 else
    2919             :                         expires = 0;
    2920             :         }
    2921             : 
    2922           0 :         memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
    2923           0 :         if (rt->rt_pmtu && expires)
    2924           0 :                 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
    2925           0 :         if (rt->rt_mtu_locked && expires)
    2926           0 :                 metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
    2927           0 :         if (rtnetlink_put_metrics(skb, metrics) < 0)
    2928           0 :                 goto nla_put_failure;
    2929             : 
    2930           0 :         if (fl4) {
    2931           0 :                 if (fl4->flowi4_mark &&
    2932           0 :                     nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
    2933           0 :                         goto nla_put_failure;
    2934             : 
    2935           0 :                 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
    2936           0 :                     nla_put_u32(skb, RTA_UID,
    2937             :                                 from_kuid_munged(current_user_ns(),
    2938             :                                                  fl4->flowi4_uid)))
    2939           0 :                         goto nla_put_failure;
    2940             : 
    2941           0 :                 if (rt_is_input_route(rt)) {
    2942             : #ifdef CONFIG_IP_MROUTE
    2943             :                         if (ipv4_is_multicast(dst) &&
    2944             :                             !ipv4_is_local_multicast(dst) &&
    2945             :                             IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
    2946             :                                 int err = ipmr_get_route(net, skb,
    2947             :                                                          fl4->saddr, fl4->daddr,
    2948             :                                                          r, portid);
    2949             : 
    2950             :                                 if (err <= 0) {
    2951             :                                         if (err == 0)
    2952             :                                                 return 0;
    2953             :                                         goto nla_put_failure;
    2954             :                                 }
    2955             :                         } else
    2956             : #endif
    2957           0 :                                 if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif))
    2958           0 :                                         goto nla_put_failure;
    2959             :                 }
    2960             :         }
    2961             : 
    2962           0 :         error = rt->dst.error;
    2963             : 
    2964           0 :         if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
    2965           0 :                 goto nla_put_failure;
    2966             : 
    2967           0 :         nlmsg_end(skb, nlh);
    2968           0 :         return 0;
    2969             : 
    2970           0 : nla_put_failure:
    2971           0 :         nlmsg_cancel(skb, nlh);
    2972           0 :         return -EMSGSIZE;
    2973             : }
    2974             : 
    2975           0 : static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
    2976             :                             struct netlink_callback *cb, u32 table_id,
    2977             :                             struct fnhe_hash_bucket *bucket, int genid,
    2978             :                             int *fa_index, int fa_start, unsigned int flags)
    2979             : {
    2980           0 :         int i;
    2981             : 
    2982           0 :         for (i = 0; i < FNHE_HASH_SIZE; i++) {
    2983           0 :                 struct fib_nh_exception *fnhe;
    2984             : 
    2985           0 :                 for (fnhe = rcu_dereference(bucket[i].chain); fnhe;
    2986           0 :                      fnhe = rcu_dereference(fnhe->fnhe_next)) {
    2987           0 :                         struct rtable *rt;
    2988           0 :                         int err;
    2989             : 
    2990           0 :                         if (*fa_index < fa_start)
    2991           0 :                                 goto next;
    2992             : 
    2993           0 :                         if (fnhe->fnhe_genid != genid)
    2994           0 :                                 goto next;
    2995             : 
    2996           0 :                         if (fnhe->fnhe_expires &&
    2997           0 :                             time_after(jiffies, fnhe->fnhe_expires))
    2998           0 :                                 goto next;
    2999             : 
    3000           0 :                         rt = rcu_dereference(fnhe->fnhe_rth_input);
    3001           0 :                         if (!rt)
    3002           0 :                                 rt = rcu_dereference(fnhe->fnhe_rth_output);
    3003           0 :                         if (!rt)
    3004           0 :                                 goto next;
    3005             : 
    3006           0 :                         err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
    3007             :                                            table_id, NULL, skb,
    3008           0 :                                            NETLINK_CB(cb->skb).portid,
    3009           0 :                                            cb->nlh->nlmsg_seq, flags);
    3010           0 :                         if (err)
    3011           0 :                                 return err;
    3012           0 : next:
    3013           0 :                         (*fa_index)++;
    3014             :                 }
    3015             :         }
    3016             : 
    3017             :         return 0;
    3018             : }
    3019             : 
    3020           9 : int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
    3021             :                        u32 table_id, struct fib_info *fi,
    3022             :                        int *fa_index, int fa_start, unsigned int flags)
    3023             : {
    3024           9 :         struct net *net = sock_net(cb->skb->sk);
    3025           9 :         int nhsel, genid = fnhe_genid(net);
    3026             : 
    3027          36 :         for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
    3028           9 :                 struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel);
    3029           9 :                 struct fnhe_hash_bucket *bucket;
    3030           9 :                 int err;
    3031             : 
    3032           9 :                 if (nhc->nhc_flags & RTNH_F_DEAD)
    3033           0 :                         continue;
    3034             : 
    3035           9 :                 rcu_read_lock();
    3036           9 :                 bucket = rcu_dereference(nhc->nhc_exceptions);
    3037           9 :                 err = 0;
    3038           9 :                 if (bucket)
    3039           0 :                         err = fnhe_dump_bucket(net, skb, cb, table_id, bucket,
    3040             :                                                genid, fa_index, fa_start,
    3041             :                                                flags);
    3042           0 :                 rcu_read_unlock();
    3043           9 :                 if (err)
    3044           0 :                         return err;
    3045             :         }
    3046             : 
    3047             :         return 0;
    3048             : }
    3049             : 
    3050           0 : static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
    3051             :                                                    u8 ip_proto, __be16 sport,
    3052             :                                                    __be16 dport)
    3053             : {
    3054           0 :         struct sk_buff *skb;
    3055           0 :         struct iphdr *iph;
    3056             : 
    3057           0 :         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
    3058           0 :         if (!skb)
    3059             :                 return NULL;
    3060             : 
    3061             :         /* Reserve room for dummy headers, this skb can pass
    3062             :          * through good chunk of routing engine.
    3063             :          */
    3064           0 :         skb_reset_mac_header(skb);
    3065           0 :         skb_reset_network_header(skb);
    3066           0 :         skb->protocol = htons(ETH_P_IP);
    3067           0 :         iph = skb_put(skb, sizeof(struct iphdr));
    3068           0 :         iph->protocol = ip_proto;
    3069           0 :         iph->saddr = src;
    3070           0 :         iph->daddr = dst;
    3071           0 :         iph->version = 0x4;
    3072           0 :         iph->frag_off = 0;
    3073           0 :         iph->ihl = 0x5;
    3074           0 :         skb_set_transport_header(skb, skb->len);
    3075             : 
    3076           0 :         switch (iph->protocol) {
    3077           0 :         case IPPROTO_UDP: {
    3078           0 :                 struct udphdr *udph;
    3079             : 
    3080           0 :                 udph = skb_put_zero(skb, sizeof(struct udphdr));
    3081           0 :                 udph->source = sport;
    3082           0 :                 udph->dest = dport;
    3083           0 :                 udph->len = sizeof(struct udphdr);
    3084           0 :                 udph->check = 0;
    3085           0 :                 break;
    3086             :         }
    3087           0 :         case IPPROTO_TCP: {
    3088           0 :                 struct tcphdr *tcph;
    3089             : 
    3090           0 :                 tcph = skb_put_zero(skb, sizeof(struct tcphdr));
    3091           0 :                 tcph->source = sport;
    3092           0 :                 tcph->dest   = dport;
    3093           0 :                 tcph->doff   = sizeof(struct tcphdr) / 4;
    3094           0 :                 tcph->rst = 1;
    3095           0 :                 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr),
    3096             :                                             src, dst, 0);
    3097           0 :                 break;
    3098             :         }
    3099           0 :         case IPPROTO_ICMP: {
    3100           0 :                 struct icmphdr *icmph;
    3101             : 
    3102           0 :                 icmph = skb_put_zero(skb, sizeof(struct icmphdr));
    3103           0 :                 icmph->type = ICMP_ECHO;
    3104           0 :                 icmph->code = 0;
    3105             :         }
    3106             :         }
    3107             : 
    3108             :         return skb;
    3109             : }
    3110             : 
    3111           0 : static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
    3112             :                                        const struct nlmsghdr *nlh,
    3113             :                                        struct nlattr **tb,
    3114             :                                        struct netlink_ext_ack *extack)
    3115             : {
    3116           0 :         struct rtmsg *rtm;
    3117           0 :         int i, err;
    3118             : 
    3119           0 :         if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
    3120           0 :                 NL_SET_ERR_MSG(extack,
    3121             :                                "ipv4: Invalid header for route get request");
    3122           0 :                 return -EINVAL;
    3123             :         }
    3124             : 
    3125           0 :         if (!netlink_strict_get_check(skb))
    3126           0 :                 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
    3127             :                                               rtm_ipv4_policy, extack);
    3128             : 
    3129           0 :         rtm = nlmsg_data(nlh);
    3130           0 :         if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
    3131           0 :             (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
    3132             :             rtm->rtm_table || rtm->rtm_protocol ||
    3133           0 :             rtm->rtm_scope || rtm->rtm_type) {
    3134           0 :                 NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request");
    3135           0 :                 return -EINVAL;
    3136             :         }
    3137             : 
    3138           0 :         if (rtm->rtm_flags & ~(RTM_F_NOTIFY |
    3139             :                                RTM_F_LOOKUP_TABLE |
    3140             :                                RTM_F_FIB_MATCH)) {
    3141           0 :                 NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request");
    3142           0 :                 return -EINVAL;
    3143             :         }
    3144             : 
    3145           0 :         err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
    3146             :                                             rtm_ipv4_policy, extack);
    3147           0 :         if (err)
    3148             :                 return err;
    3149             : 
    3150           0 :         if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
    3151           0 :             (tb[RTA_DST] && !rtm->rtm_dst_len)) {
    3152           0 :                 NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
    3153           0 :                 return -EINVAL;
    3154             :         }
    3155             : 
    3156           0 :         for (i = 0; i <= RTA_MAX; i++) {
    3157           0 :                 if (!tb[i])
    3158           0 :                         continue;
    3159             : 
    3160           0 :                 switch (i) {
    3161             :                 case RTA_IIF:
    3162             :                 case RTA_OIF:
    3163             :                 case RTA_SRC:
    3164             :                 case RTA_DST:
    3165             :                 case RTA_IP_PROTO:
    3166             :                 case RTA_SPORT:
    3167             :                 case RTA_DPORT:
    3168             :                 case RTA_MARK:
    3169             :                 case RTA_UID:
    3170             :                         break;
    3171           0 :                 default:
    3172           0 :                         NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request");
    3173             :                         return -EINVAL;
    3174             :                 }
    3175             :         }
    3176             : 
    3177             :         return 0;
    3178             : }
    3179             : 
    3180           0 : static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
    3181             :                              struct netlink_ext_ack *extack)
    3182             : {
    3183           0 :         struct net *net = sock_net(in_skb->sk);
    3184           0 :         struct nlattr *tb[RTA_MAX+1];
    3185           0 :         u32 table_id = RT_TABLE_MAIN;
    3186           0 :         __be16 sport = 0, dport = 0;
    3187           0 :         struct fib_result res = {};
    3188           0 :         u8 ip_proto = IPPROTO_UDP;
    3189           0 :         struct rtable *rt = NULL;
    3190           0 :         struct sk_buff *skb;
    3191           0 :         struct rtmsg *rtm;
    3192           0 :         struct flowi4 fl4 = {};
    3193           0 :         __be32 dst = 0;
    3194           0 :         __be32 src = 0;
    3195           0 :         kuid_t uid;
    3196           0 :         u32 iif;
    3197           0 :         int err;
    3198           0 :         int mark;
    3199             : 
    3200           0 :         err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
    3201           0 :         if (err < 0)
    3202             :                 return err;
    3203             : 
    3204           0 :         rtm = nlmsg_data(nlh);
    3205           0 :         src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
    3206           0 :         dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
    3207           0 :         iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
    3208           0 :         mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
    3209           0 :         if (tb[RTA_UID])
    3210           0 :                 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
    3211             :         else
    3212           0 :                 uid = (iif ? INVALID_UID : current_uid());
    3213             : 
    3214           0 :         if (tb[RTA_IP_PROTO]) {
    3215           0 :                 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
    3216             :                                                   &ip_proto, AF_INET, extack);
    3217           0 :                 if (err)
    3218             :                         return err;
    3219             :         }
    3220             : 
    3221           0 :         if (tb[RTA_SPORT])
    3222           0 :                 sport = nla_get_be16(tb[RTA_SPORT]);
    3223             : 
    3224           0 :         if (tb[RTA_DPORT])
    3225           0 :                 dport = nla_get_be16(tb[RTA_DPORT]);
    3226             : 
    3227           0 :         skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport);
    3228           0 :         if (!skb)
    3229             :                 return -ENOBUFS;
    3230             : 
    3231           0 :         fl4.daddr = dst;
    3232           0 :         fl4.saddr = src;
    3233           0 :         fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK;
    3234           0 :         fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
    3235           0 :         fl4.flowi4_mark = mark;
    3236           0 :         fl4.flowi4_uid = uid;
    3237           0 :         if (sport)
    3238           0 :                 fl4.fl4_sport = sport;
    3239           0 :         if (dport)
    3240           0 :                 fl4.fl4_dport = dport;
    3241           0 :         fl4.flowi4_proto = ip_proto;
    3242             : 
    3243           0 :         rcu_read_lock();
    3244             : 
    3245           0 :         if (iif) {
    3246           0 :                 struct net_device *dev;
    3247             : 
    3248           0 :                 dev = dev_get_by_index_rcu(net, iif);
    3249           0 :                 if (!dev) {
    3250           0 :                         err = -ENODEV;
    3251           0 :                         goto errout_rcu;
    3252             :                 }
    3253             : 
    3254           0 :                 fl4.flowi4_iif = iif; /* for rt_fill_info */
    3255           0 :                 skb->dev     = dev;
    3256           0 :                 skb->mark    = mark;
    3257           0 :                 err = ip_route_input_rcu(skb, dst, src,
    3258           0 :                                          rtm->rtm_tos & IPTOS_RT_MASK, dev,
    3259             :                                          &res);
    3260             : 
    3261           0 :                 rt = skb_rtable(skb);
    3262           0 :                 if (err == 0 && rt->dst.error)
    3263           0 :                         err = -rt->dst.error;
    3264             :         } else {
    3265           0 :                 fl4.flowi4_iif = LOOPBACK_IFINDEX;
    3266           0 :                 skb->dev = net->loopback_dev;
    3267           0 :                 rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
    3268           0 :                 err = 0;
    3269           0 :                 if (IS_ERR(rt))
    3270           0 :                         err = PTR_ERR(rt);
    3271             :                 else
    3272           0 :                         skb_dst_set(skb, &rt->dst);
    3273             :         }
    3274             : 
    3275           0 :         if (err)
    3276           0 :                 goto errout_rcu;
    3277             : 
    3278           0 :         if (rtm->rtm_flags & RTM_F_NOTIFY)
    3279           0 :                 rt->rt_flags |= RTCF_NOTIFY;
    3280             : 
    3281           0 :         if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
    3282           0 :                 table_id = res.table ? res.table->tb_id : 0;
    3283             : 
    3284             :         /* reset skb for netlink reply msg */
    3285           0 :         skb_trim(skb, 0);
    3286           0 :         skb_reset_network_header(skb);
    3287           0 :         skb_reset_transport_header(skb);
    3288           0 :         skb_reset_mac_header(skb);
    3289             : 
    3290           0 :         if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
    3291           0 :                 struct fib_rt_info fri;
    3292             : 
    3293           0 :                 if (!res.fi) {
    3294           0 :                         err = fib_props[res.type].error;
    3295           0 :                         if (!err)
    3296           0 :                                 err = -EHOSTUNREACH;
    3297           0 :                         goto errout_rcu;
    3298             :                 }
    3299           0 :                 fri.fi = res.fi;
    3300           0 :                 fri.tb_id = table_id;
    3301           0 :                 fri.dst = res.prefix;
    3302           0 :                 fri.dst_len = res.prefixlen;
    3303           0 :                 fri.tos = fl4.flowi4_tos;
    3304           0 :                 fri.type = rt->rt_type;
    3305           0 :                 fri.offload = 0;
    3306           0 :                 fri.trap = 0;
    3307           0 :                 fri.offload_failed = 0;
    3308           0 :                 if (res.fa_head) {
    3309           0 :                         struct fib_alias *fa;
    3310             : 
    3311           0 :                         hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) {
    3312           0 :                                 u8 slen = 32 - fri.dst_len;
    3313             : 
    3314           0 :                                 if (fa->fa_slen == slen &&
    3315           0 :                                     fa->tb_id == fri.tb_id &&
    3316           0 :                                     fa->fa_tos == fri.tos &&
    3317           0 :                                     fa->fa_info == res.fi &&
    3318           0 :                                     fa->fa_type == fri.type) {
    3319           0 :                                         fri.offload = fa->offload;
    3320           0 :                                         fri.trap = fa->trap;
    3321           0 :                                         break;
    3322             :                                 }
    3323             :                         }
    3324             :                 }
    3325           0 :                 err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
    3326             :                                     nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0);
    3327             :         } else {
    3328           0 :                 err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
    3329             :                                    NETLINK_CB(in_skb).portid,
    3330             :                                    nlh->nlmsg_seq, 0);
    3331             :         }
    3332           0 :         if (err < 0)
    3333           0 :                 goto errout_rcu;
    3334             : 
    3335           0 :         rcu_read_unlock();
    3336             : 
    3337           0 :         err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
    3338             : 
    3339             : errout_free:
    3340             :         return err;
    3341           0 : errout_rcu:
    3342           0 :         rcu_read_unlock();
    3343           0 :         kfree_skb(skb);
    3344           0 :         goto errout_free;
    3345             : }
    3346             : 
    3347           2 : void ip_rt_multicast_event(struct in_device *in_dev)
    3348             : {
    3349           2 :         rt_cache_flush(dev_net(in_dev->dev));
    3350           2 : }
    3351             : 
    3352             : #ifdef CONFIG_SYSCTL
    3353             : static int ip_rt_gc_interval __read_mostly  = 60 * HZ;
    3354             : static int ip_rt_gc_min_interval __read_mostly  = HZ / 2;
    3355             : static int ip_rt_gc_elasticity __read_mostly    = 8;
    3356             : static int ip_min_valid_pmtu __read_mostly      = IPV4_MIN_MTU;
    3357             : 
    3358           0 : static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
    3359             :                 void *buffer, size_t *lenp, loff_t *ppos)
    3360             : {
    3361           0 :         struct net *net = (struct net *)__ctl->extra1;
    3362             : 
    3363           0 :         if (write) {
    3364           0 :                 rt_cache_flush(net);
    3365           0 :                 fnhe_genid_bump(net);
    3366           0 :                 return 0;
    3367             :         }
    3368             : 
    3369             :         return -EINVAL;
    3370             : }
    3371             : 
    3372             : static struct ctl_table ipv4_route_table[] = {
    3373             :         {
    3374             :                 .procname       = "gc_thresh",
    3375             :                 .data           = &ipv4_dst_ops.gc_thresh,
    3376             :                 .maxlen         = sizeof(int),
    3377             :                 .mode           = 0644,
    3378             :                 .proc_handler   = proc_dointvec,
    3379             :         },
    3380             :         {
    3381             :                 .procname       = "max_size",
    3382             :                 .data           = &ip_rt_max_size,
    3383             :                 .maxlen         = sizeof(int),
    3384             :                 .mode           = 0644,
    3385             :                 .proc_handler   = proc_dointvec,
    3386             :         },
    3387             :         {
    3388             :                 /*  Deprecated. Use gc_min_interval_ms */
    3389             : 
    3390             :                 .procname       = "gc_min_interval",
    3391             :                 .data           = &ip_rt_gc_min_interval,
    3392             :                 .maxlen         = sizeof(int),
    3393             :                 .mode           = 0644,
    3394             :                 .proc_handler   = proc_dointvec_jiffies,
    3395             :         },
    3396             :         {
    3397             :                 .procname       = "gc_min_interval_ms",
    3398             :                 .data           = &ip_rt_gc_min_interval,
    3399             :                 .maxlen         = sizeof(int),
    3400             :                 .mode           = 0644,
    3401             :                 .proc_handler   = proc_dointvec_ms_jiffies,
    3402             :         },
    3403             :         {
    3404             :                 .procname       = "gc_timeout",
    3405             :                 .data           = &ip_rt_gc_timeout,
    3406             :                 .maxlen         = sizeof(int),
    3407             :                 .mode           = 0644,
    3408             :                 .proc_handler   = proc_dointvec_jiffies,
    3409             :         },
    3410             :         {
    3411             :                 .procname       = "gc_interval",
    3412             :                 .data           = &ip_rt_gc_interval,
    3413             :                 .maxlen         = sizeof(int),
    3414             :                 .mode           = 0644,
    3415             :                 .proc_handler   = proc_dointvec_jiffies,
    3416             :         },
    3417             :         {
    3418             :                 .procname       = "redirect_load",
    3419             :                 .data           = &ip_rt_redirect_load,
    3420             :                 .maxlen         = sizeof(int),
    3421             :                 .mode           = 0644,
    3422             :                 .proc_handler   = proc_dointvec,
    3423             :         },
    3424             :         {
    3425             :                 .procname       = "redirect_number",
    3426             :                 .data           = &ip_rt_redirect_number,
    3427             :                 .maxlen         = sizeof(int),
    3428             :                 .mode           = 0644,
    3429             :                 .proc_handler   = proc_dointvec,
    3430             :         },
    3431             :         {
    3432             :                 .procname       = "redirect_silence",
    3433             :                 .data           = &ip_rt_redirect_silence,
    3434             :                 .maxlen         = sizeof(int),
    3435             :                 .mode           = 0644,
    3436             :                 .proc_handler   = proc_dointvec,
    3437             :         },
    3438             :         {
    3439             :                 .procname       = "error_cost",
    3440             :                 .data           = &ip_rt_error_cost,
    3441             :                 .maxlen         = sizeof(int),
    3442             :                 .mode           = 0644,
    3443             :                 .proc_handler   = proc_dointvec,
    3444             :         },
    3445             :         {
    3446             :                 .procname       = "error_burst",
    3447             :                 .data           = &ip_rt_error_burst,
    3448             :                 .maxlen         = sizeof(int),
    3449             :                 .mode           = 0644,
    3450             :                 .proc_handler   = proc_dointvec,
    3451             :         },
    3452             :         {
    3453             :                 .procname       = "gc_elasticity",
    3454             :                 .data           = &ip_rt_gc_elasticity,
    3455             :                 .maxlen         = sizeof(int),
    3456             :                 .mode           = 0644,
    3457             :                 .proc_handler   = proc_dointvec,
    3458             :         },
    3459             :         {
    3460             :                 .procname       = "mtu_expires",
    3461             :                 .data           = &ip_rt_mtu_expires,
    3462             :                 .maxlen         = sizeof(int),
    3463             :                 .mode           = 0644,
    3464             :                 .proc_handler   = proc_dointvec_jiffies,
    3465             :         },
    3466             :         {
    3467             :                 .procname       = "min_pmtu",
    3468             :                 .data           = &ip_rt_min_pmtu,
    3469             :                 .maxlen         = sizeof(int),
    3470             :                 .mode           = 0644,
    3471             :                 .proc_handler   = proc_dointvec_minmax,
    3472             :                 .extra1         = &ip_min_valid_pmtu,
    3473             :         },
    3474             :         {
    3475             :                 .procname       = "min_adv_mss",
    3476             :                 .data           = &ip_rt_min_advmss,
    3477             :                 .maxlen         = sizeof(int),
    3478             :                 .mode           = 0644,
    3479             :                 .proc_handler   = proc_dointvec,
    3480             :         },
    3481             :         { }
    3482             : };
    3483             : 
    3484             : static const char ipv4_route_flush_procname[] = "flush";
    3485             : 
    3486             : static struct ctl_table ipv4_route_flush_table[] = {
    3487             :         {
    3488             :                 .procname       = ipv4_route_flush_procname,
    3489             :                 .maxlen         = sizeof(int),
    3490             :                 .mode           = 0200,
    3491             :                 .proc_handler   = ipv4_sysctl_rtcache_flush,
    3492             :         },
    3493             :         { },
    3494             : };
    3495             : 
    3496           1 : static __net_init int sysctl_route_net_init(struct net *net)
    3497             : {
    3498           1 :         struct ctl_table *tbl;
    3499             : 
    3500           1 :         tbl = ipv4_route_flush_table;
    3501           1 :         if (!net_eq(net, &init_net)) {
    3502             :                 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
    3503             :                 if (!tbl)
    3504             :                         goto err_dup;
    3505             : 
    3506             :                 /* Don't export non-whitelisted sysctls to unprivileged users */
    3507             :                 if (net->user_ns != &init_user_ns) {
    3508             :                         if (tbl[0].procname != ipv4_route_flush_procname)
    3509             :                                 tbl[0].procname = NULL;
    3510             :                 }
    3511             :         }
    3512           1 :         tbl[0].extra1 = net;
    3513             : 
    3514           1 :         net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
    3515           1 :         if (!net->ipv4.route_hdr)
    3516           0 :                 goto err_reg;
    3517             :         return 0;
    3518             : 
    3519           0 : err_reg:
    3520           0 :         if (tbl != ipv4_route_flush_table)
    3521             :                 kfree(tbl);
    3522           0 : err_dup:
    3523           0 :         return -ENOMEM;
    3524             : }
    3525             : 
    3526           0 : static __net_exit void sysctl_route_net_exit(struct net *net)
    3527             : {
    3528           0 :         struct ctl_table *tbl;
    3529             : 
    3530           0 :         tbl = net->ipv4.route_hdr->ctl_table_arg;
    3531           0 :         unregister_net_sysctl_table(net->ipv4.route_hdr);
    3532           0 :         BUG_ON(tbl == ipv4_route_flush_table);
    3533           0 :         kfree(tbl);
    3534           0 : }
    3535             : 
    3536             : static __net_initdata struct pernet_operations sysctl_route_ops = {
    3537             :         .init = sysctl_route_net_init,
    3538             :         .exit = sysctl_route_net_exit,
    3539             : };
    3540             : #endif
    3541             : 
    3542           1 : static __net_init int rt_genid_init(struct net *net)
    3543             : {
    3544           1 :         atomic_set(&net->ipv4.rt_genid, 0);
    3545           1 :         atomic_set(&net->fnhe_genid, 0);
    3546           1 :         atomic_set(&net->ipv4.dev_addr_genid, get_random_int());
    3547           1 :         return 0;
    3548             : }
    3549             : 
    3550             : static __net_initdata struct pernet_operations rt_genid_ops = {
    3551             :         .init = rt_genid_init,
    3552             : };
    3553             : 
    3554           1 : static int __net_init ipv4_inetpeer_init(struct net *net)
    3555             : {
    3556           1 :         struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
    3557             : 
    3558           1 :         if (!bp)
    3559             :                 return -ENOMEM;
    3560           1 :         inet_peer_base_init(bp);
    3561           1 :         net->ipv4.peers = bp;
    3562           1 :         return 0;
    3563             : }
    3564             : 
    3565           0 : static void __net_exit ipv4_inetpeer_exit(struct net *net)
    3566             : {
    3567           0 :         struct inet_peer_base *bp = net->ipv4.peers;
    3568             : 
    3569           0 :         net->ipv4.peers = NULL;
    3570           0 :         inetpeer_invalidate_tree(bp);
    3571           0 :         kfree(bp);
    3572           0 : }
    3573             : 
    3574             : static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
    3575             :         .init   =       ipv4_inetpeer_init,
    3576             :         .exit   =       ipv4_inetpeer_exit,
    3577             : };
    3578             : 
    3579             : #ifdef CONFIG_IP_ROUTE_CLASSID
    3580             : struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
    3581             : #endif /* CONFIG_IP_ROUTE_CLASSID */
    3582             : 
    3583           1 : int __init ip_rt_init(void)
    3584             : {
    3585           1 :         int cpu;
    3586             : 
    3587           1 :         ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
    3588             :                                   GFP_KERNEL);
    3589           1 :         if (!ip_idents)
    3590           0 :                 panic("IP: failed to allocate ip_idents\n");
    3591             : 
    3592           1 :         prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
    3593             : 
    3594           1 :         ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
    3595           1 :         if (!ip_tstamps)
    3596           0 :                 panic("IP: failed to allocate ip_tstamps\n");
    3597             : 
    3598           5 :         for_each_possible_cpu(cpu) {
    3599           4 :                 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
    3600             : 
    3601           4 :                 INIT_LIST_HEAD(&ul->head);
    3602           5 :                 spin_lock_init(&ul->lock);
    3603             :         }
    3604             : #ifdef CONFIG_IP_ROUTE_CLASSID
    3605             :         ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
    3606             :         if (!ip_rt_acct)
    3607             :                 panic("IP: failed to allocate ip_rt_acct\n");
    3608             : #endif
    3609             : 
    3610           2 :         ipv4_dst_ops.kmem_cachep =
    3611           1 :                 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
    3612             :                                   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
    3613             : 
    3614           1 :         ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
    3615             : 
    3616           1 :         if (dst_entries_init(&ipv4_dst_ops) < 0)
    3617           0 :                 panic("IP: failed to allocate ipv4_dst_ops counter\n");
    3618             : 
    3619           1 :         if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
    3620           0 :                 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
    3621             : 
    3622           1 :         ipv4_dst_ops.gc_thresh = ~0;
    3623           1 :         ip_rt_max_size = INT_MAX;
    3624             : 
    3625           1 :         devinet_init();
    3626           1 :         ip_fib_init();
    3627             : 
    3628           1 :         if (ip_rt_proc_init())
    3629           0 :                 pr_err("Unable to create route proc files\n");
    3630             : #ifdef CONFIG_XFRM
    3631             :         xfrm_init();
    3632             :         xfrm4_init();
    3633             : #endif
    3634           1 :         rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
    3635             :                       RTNL_FLAG_DOIT_UNLOCKED);
    3636             : 
    3637             : #ifdef CONFIG_SYSCTL
    3638           1 :         register_pernet_subsys(&sysctl_route_ops);
    3639             : #endif
    3640           1 :         register_pernet_subsys(&rt_genid_ops);
    3641           1 :         register_pernet_subsys(&ipv4_inetpeer_ops);
    3642           1 :         return 0;
    3643             : }
    3644             : 
    3645             : #ifdef CONFIG_SYSCTL
    3646             : /*
    3647             :  * We really need to sanitize the damn ipv4 init order, then all
    3648             :  * this nonsense will go away.
    3649             :  */
    3650           1 : void __init ip_static_sysctl_init(void)
    3651             : {
    3652           1 :         register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
    3653           1 : }
    3654             : #endif

Generated by: LCOV version 1.14