LCOV - code coverage report
Current view: top level - net/ipv4 - fib_trie.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 521 1363 38.2 %
Date: 2021-04-22 12:43:58 Functions: 32 73 43.8 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  *
       4             :  *   Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
       5             :  *     & Swedish University of Agricultural Sciences.
       6             :  *
       7             :  *   Jens Laas <jens.laas@data.slu.se> Swedish University of
       8             :  *     Agricultural Sciences.
       9             :  *
      10             :  *   Hans Liss <hans.liss@its.uu.se>  Uppsala Universitet
      11             :  *
      12             :  * This work is based on the LPC-trie which is originally described in:
      13             :  *
      14             :  * An experimental study of compression methods for dynamic tries
      15             :  * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
      16             :  * https://www.csc.kth.se/~snilsson/software/dyntrie2/
      17             :  *
      18             :  * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
      19             :  * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
      20             :  *
      21             :  * Code from fib_hash has been reused which includes the following header:
      22             :  *
      23             :  * INET         An implementation of the TCP/IP protocol suite for the LINUX
      24             :  *              operating system.  INET is implemented using the  BSD Socket
      25             :  *              interface as the means of communication with the user level.
      26             :  *
      27             :  *              IPv4 FIB: lookup engine and maintenance routines.
      28             :  *
      29             :  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
      30             :  *
      31             :  * Substantial contributions to this work comes from:
      32             :  *
      33             :  *              David S. Miller, <davem@davemloft.net>
      34             :  *              Stephen Hemminger <shemminger@osdl.org>
      35             :  *              Paul E. McKenney <paulmck@us.ibm.com>
      36             :  *              Patrick McHardy <kaber@trash.net>
      37             :  */
      38             : #include <linux/cache.h>
      39             : #include <linux/uaccess.h>
      40             : #include <linux/bitops.h>
      41             : #include <linux/types.h>
      42             : #include <linux/kernel.h>
      43             : #include <linux/mm.h>
      44             : #include <linux/string.h>
      45             : #include <linux/socket.h>
      46             : #include <linux/sockios.h>
      47             : #include <linux/errno.h>
      48             : #include <linux/in.h>
      49             : #include <linux/inet.h>
      50             : #include <linux/inetdevice.h>
      51             : #include <linux/netdevice.h>
      52             : #include <linux/if_arp.h>
      53             : #include <linux/proc_fs.h>
      54             : #include <linux/rcupdate.h>
      55             : #include <linux/skbuff.h>
      56             : #include <linux/netlink.h>
      57             : #include <linux/init.h>
      58             : #include <linux/list.h>
      59             : #include <linux/slab.h>
      60             : #include <linux/export.h>
      61             : #include <linux/vmalloc.h>
      62             : #include <linux/notifier.h>
      63             : #include <net/net_namespace.h>
      64             : #include <net/ip.h>
      65             : #include <net/protocol.h>
      66             : #include <net/route.h>
      67             : #include <net/tcp.h>
      68             : #include <net/sock.h>
      69             : #include <net/ip_fib.h>
      70             : #include <net/fib_notifier.h>
      71             : #include <trace/events/fib.h>
      72             : #include "fib_lookup.h"
      73             : 
      74           0 : static int call_fib_entry_notifier(struct notifier_block *nb,
      75             :                                    enum fib_event_type event_type, u32 dst,
      76             :                                    int dst_len, struct fib_alias *fa,
      77             :                                    struct netlink_ext_ack *extack)
      78             : {
      79           0 :         struct fib_entry_notifier_info info = {
      80             :                 .info.extack = extack,
      81             :                 .dst = dst,
      82             :                 .dst_len = dst_len,
      83           0 :                 .fi = fa->fa_info,
      84           0 :                 .tos = fa->fa_tos,
      85           0 :                 .type = fa->fa_type,
      86           0 :                 .tb_id = fa->tb_id,
      87             :         };
      88           0 :         return call_fib4_notifier(nb, event_type, &info.info);
      89             : }
      90             : 
      91          10 : static int call_fib_entry_notifiers(struct net *net,
      92             :                                     enum fib_event_type event_type, u32 dst,
      93             :                                     int dst_len, struct fib_alias *fa,
      94             :                                     struct netlink_ext_ack *extack)
      95             : {
      96          10 :         struct fib_entry_notifier_info info = {
      97             :                 .info.extack = extack,
      98             :                 .dst = dst,
      99             :                 .dst_len = dst_len,
     100          10 :                 .fi = fa->fa_info,
     101          10 :                 .tos = fa->fa_tos,
     102          10 :                 .type = fa->fa_type,
     103          10 :                 .tb_id = fa->tb_id,
     104             :         };
     105          10 :         return call_fib4_notifiers(net, event_type, &info.info);
     106             : }
     107             : 
     108             : #define MAX_STAT_DEPTH 32
     109             : 
     110             : #define KEYLENGTH       (8*sizeof(t_key))
     111             : #define KEY_MAX         ((t_key)~0)
     112             : 
     113             : typedef unsigned int t_key;
     114             : 
     115             : #define IS_TRIE(n)      ((n)->pos >= KEYLENGTH)
     116             : #define IS_TNODE(n)     ((n)->bits)
     117             : #define IS_LEAF(n)      (!(n)->bits)
     118             : 
     119             : struct key_vector {
     120             :         t_key key;
     121             :         unsigned char pos;              /* 2log(KEYLENGTH) bits needed */
     122             :         unsigned char bits;             /* 2log(KEYLENGTH) bits needed */
     123             :         unsigned char slen;
     124             :         union {
     125             :                 /* This list pointer if valid if (pos | bits) == 0 (LEAF) */
     126             :                 struct hlist_head leaf;
     127             :                 /* This array is valid if (pos | bits) > 0 (TNODE) */
     128             :                 struct key_vector __rcu *tnode[0];
     129             :         };
     130             : };
     131             : 
     132             : struct tnode {
     133             :         struct rcu_head rcu;
     134             :         t_key empty_children;           /* KEYLENGTH bits needed */
     135             :         t_key full_children;            /* KEYLENGTH bits needed */
     136             :         struct key_vector __rcu *parent;
     137             :         struct key_vector kv[1];
     138             : #define tn_bits kv[0].bits
     139             : };
     140             : 
     141             : #define TNODE_SIZE(n)   offsetof(struct tnode, kv[0].tnode[n])
     142             : #define LEAF_SIZE       TNODE_SIZE(1)
     143             : 
     144             : #ifdef CONFIG_IP_FIB_TRIE_STATS
     145             : struct trie_use_stats {
     146             :         unsigned int gets;
     147             :         unsigned int backtrack;
     148             :         unsigned int semantic_match_passed;
     149             :         unsigned int semantic_match_miss;
     150             :         unsigned int null_node_hit;
     151             :         unsigned int resize_node_skipped;
     152             : };
     153             : #endif
     154             : 
     155             : struct trie_stat {
     156             :         unsigned int totdepth;
     157             :         unsigned int maxdepth;
     158             :         unsigned int tnodes;
     159             :         unsigned int leaves;
     160             :         unsigned int nullpointers;
     161             :         unsigned int prefixes;
     162             :         unsigned int nodesizes[MAX_STAT_DEPTH];
     163             : };
     164             : 
     165             : struct trie {
     166             :         struct key_vector kv[1];
     167             : #ifdef CONFIG_IP_FIB_TRIE_STATS
     168             :         struct trie_use_stats __percpu *stats;
     169             : #endif
     170             : };
     171             : 
     172             : static struct key_vector *resize(struct trie *t, struct key_vector *tn);
     173             : static unsigned int tnode_free_size;
     174             : 
     175             : /*
     176             :  * synchronize_rcu after call_rcu for outstanding dirty memory; it should be
     177             :  * especially useful before resizing the root node with PREEMPT_NONE configs;
     178             :  * the value was obtained experimentally, aiming to avoid visible slowdown.
     179             :  */
     180             : unsigned int sysctl_fib_sync_mem = 512 * 1024;
     181             : unsigned int sysctl_fib_sync_mem_min = 64 * 1024;
     182             : unsigned int sysctl_fib_sync_mem_max = 64 * 1024 * 1024;
     183             : 
     184             : static struct kmem_cache *fn_alias_kmem __ro_after_init;
     185             : static struct kmem_cache *trie_leaf_kmem __ro_after_init;
     186             : 
     187         246 : static inline struct tnode *tn_info(struct key_vector *kv)
     188             : {
     189         246 :         return container_of(kv, struct tnode, kv[0]);
     190             : }
     191             : 
     192             : /* caller must hold RTNL */
     193             : #define node_parent(tn) rtnl_dereference(tn_info(tn)->parent)
     194             : #define get_child(tn, i) rtnl_dereference((tn)->tnode[i])
     195             : 
     196             : /* caller must hold RCU read lock or RTNL */
     197             : #define node_parent_rcu(tn) rcu_dereference_rtnl(tn_info(tn)->parent)
     198             : #define get_child_rcu(tn, i) rcu_dereference_rtnl((tn)->tnode[i])
     199             : 
     200             : /* wrapper for rcu_assign_pointer */
     201          25 : static inline void node_set_parent(struct key_vector *n, struct key_vector *tp)
     202             : {
     203          25 :         if (n)
     204           2 :                 rcu_assign_pointer(tn_info(n)->parent, tp);
     205          16 : }
     206             : 
     207             : #define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER(tn_info(n)->parent, p)
     208             : 
     209             : /* This provides us with the number of children in this node, in the case of a
     210             :  * leaf this will return 0 meaning none of the children are accessible.
     211             :  */
     212         102 : static inline unsigned long child_length(const struct key_vector *tn)
     213             : {
     214         102 :         return (1ul << tn->bits) & ~(1ul);
     215             : }
     216             : 
     217             : #define get_cindex(key, kv) (((key) ^ (kv)->key) >> (kv)->pos)
     218             : 
     219          83 : static inline unsigned long get_index(t_key key, struct key_vector *kv)
     220             : {
     221          83 :         unsigned long index = key ^ kv->key;
     222             : 
     223          83 :         if ((BITS_PER_LONG <= KEYLENGTH) && (KEYLENGTH == kv->pos))
     224             :                 return 0;
     225             : 
     226          83 :         return index >> kv->pos;
     227             : }
     228             : 
     229             : /* To understand this stuff, an understanding of keys and all their bits is
     230             :  * necessary. Every node in the trie has a key associated with it, but not
     231             :  * all of the bits in that key are significant.
     232             :  *
     233             :  * Consider a node 'n' and its parent 'tp'.
     234             :  *
     235             :  * If n is a leaf, every bit in its key is significant. Its presence is
     236             :  * necessitated by path compression, since during a tree traversal (when
     237             :  * searching for a leaf - unless we are doing an insertion) we will completely
     238             :  * ignore all skipped bits we encounter. Thus we need to verify, at the end of
     239             :  * a potentially successful search, that we have indeed been walking the
     240             :  * correct key path.
     241             :  *
     242             :  * Note that we can never "miss" the correct key in the tree if present by
     243             :  * following the wrong path. Path compression ensures that segments of the key
     244             :  * that are the same for all keys with a given prefix are skipped, but the
     245             :  * skipped part *is* identical for each node in the subtrie below the skipped
     246             :  * bit! trie_insert() in this implementation takes care of that.
     247             :  *
     248             :  * if n is an internal node - a 'tnode' here, the various parts of its key
     249             :  * have many different meanings.
     250             :  *
     251             :  * Example:
     252             :  * _________________________________________________________________
     253             :  * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
     254             :  * -----------------------------------------------------------------
     255             :  *  31  30  29  28  27  26  25  24  23  22  21  20  19  18  17  16
     256             :  *
     257             :  * _________________________________________________________________
     258             :  * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
     259             :  * -----------------------------------------------------------------
     260             :  *  15  14  13  12  11  10   9   8   7   6   5   4   3   2   1   0
     261             :  *
     262             :  * tp->pos = 22
     263             :  * tp->bits = 3
     264             :  * n->pos = 13
     265             :  * n->bits = 4
     266             :  *
     267             :  * First, let's just ignore the bits that come before the parent tp, that is
     268             :  * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
     269             :  * point we do not use them for anything.
     270             :  *
     271             :  * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
     272             :  * index into the parent's child array. That is, they will be used to find
     273             :  * 'n' among tp's children.
     274             :  *
     275             :  * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits
     276             :  * for the node n.
     277             :  *
     278             :  * All the bits we have seen so far are significant to the node n. The rest
     279             :  * of the bits are really not needed or indeed known in n->key.
     280             :  *
     281             :  * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
     282             :  * n's child array, and will of course be different for each child.
     283             :  *
     284             :  * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown
     285             :  * at this point.
     286             :  */
     287             : 
     288             : static const int halve_threshold = 25;
     289             : static const int inflate_threshold = 50;
     290             : static const int halve_threshold_root = 15;
     291             : static const int inflate_threshold_root = 30;
     292             : 
     293           0 : static void __alias_free_mem(struct rcu_head *head)
     294             : {
     295           0 :         struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
     296           0 :         kmem_cache_free(fn_alias_kmem, fa);
     297           0 : }
     298             : 
     299           0 : static inline void alias_free_mem_rcu(struct fib_alias *fa)
     300             : {
     301           0 :         call_rcu(&fa->rcu, __alias_free_mem);
     302             : }
     303             : 
     304             : #define TNODE_VMALLOC_MAX \
     305             :         ilog2((SIZE_MAX - TNODE_SIZE(0)) / sizeof(struct key_vector *))
     306             : 
     307          10 : static void __node_free_rcu(struct rcu_head *head)
     308             : {
     309          10 :         struct tnode *n = container_of(head, struct tnode, rcu);
     310             : 
     311          10 :         if (!n->tn_bits)
     312           0 :                 kmem_cache_free(trie_leaf_kmem, n);
     313             :         else
     314          10 :                 kvfree(n);
     315          10 : }
     316             : 
     317             : #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
     318             : 
     319          16 : static struct tnode *tnode_alloc(int bits)
     320             : {
     321          16 :         size_t size;
     322             : 
     323             :         /* verify bits is within bounds */
     324          16 :         if (bits > TNODE_VMALLOC_MAX)
     325             :                 return NULL;
     326             : 
     327             :         /* determine size and verify it is non-zero and didn't overflow */
     328          16 :         size = TNODE_SIZE(1ul << bits);
     329             : 
     330          16 :         if (size <= PAGE_SIZE)
     331          16 :                 return kzalloc(size, GFP_KERNEL);
     332             :         else
     333           0 :                 return vzalloc(size);
     334             : }
     335             : 
     336           0 : static inline void empty_child_inc(struct key_vector *n)
     337             : {
     338           0 :         tn_info(n)->empty_children++;
     339             : 
     340           0 :         if (!tn_info(n)->empty_children)
     341           0 :                 tn_info(n)->full_children++;
     342             : }
     343             : 
     344          32 : static inline void empty_child_dec(struct key_vector *n)
     345             : {
     346          32 :         if (!tn_info(n)->empty_children)
     347           0 :                 tn_info(n)->full_children--;
     348             : 
     349          32 :         tn_info(n)->empty_children--;
     350          32 : }
     351             : 
     352           8 : static struct key_vector *leaf_new(t_key key, struct fib_alias *fa)
     353             : {
     354           8 :         struct key_vector *l;
     355           8 :         struct tnode *kv;
     356             : 
     357           8 :         kv = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
     358           8 :         if (!kv)
     359             :                 return NULL;
     360             : 
     361             :         /* initialize key vector */
     362           8 :         l = kv->kv;
     363           8 :         l->key = key;
     364           8 :         l->pos = 0;
     365           8 :         l->bits = 0;
     366           8 :         l->slen = fa->fa_slen;
     367             : 
     368             :         /* link leaf to fib alias */
     369           8 :         INIT_HLIST_HEAD(&l->leaf);
     370           8 :         hlist_add_head(&fa->fa_list, &l->leaf);
     371             : 
     372           8 :         return l;
     373             : }
     374             : 
     375          16 : static struct key_vector *tnode_new(t_key key, int pos, int bits)
     376             : {
     377          16 :         unsigned int shift = pos + bits;
     378          16 :         struct key_vector *tn;
     379          16 :         struct tnode *tnode;
     380             : 
     381             :         /* verify bits and pos their msb bits clear and values are valid */
     382          16 :         BUG_ON(!bits || (shift > KEYLENGTH));
     383             : 
     384          16 :         tnode = tnode_alloc(bits);
     385          16 :         if (!tnode)
     386             :                 return NULL;
     387             : 
     388          16 :         pr_debug("AT %p s=%zu %zu\n", tnode, TNODE_SIZE(0),
     389             :                  sizeof(struct key_vector *) << bits);
     390             : 
     391          16 :         if (bits == KEYLENGTH)
     392           0 :                 tnode->full_children = 1;
     393             :         else
     394          16 :                 tnode->empty_children = 1ul << bits;
     395             : 
     396          16 :         tn = tnode->kv;
     397          16 :         tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
     398          16 :         tn->pos = pos;
     399          16 :         tn->bits = bits;
     400          16 :         tn->slen = pos;
     401             : 
     402          16 :         return tn;
     403             : }
     404             : 
     405             : /* Check whether a tnode 'n' is "full", i.e. it is an internal node
     406             :  * and no bits are skipped. See discussion in dyntree paper p. 6
     407             :  */
     408         131 : static inline int tnode_full(struct key_vector *tn, struct key_vector *n)
     409             : {
     410          65 :         return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
     411             : }
     412             : 
     413             : /* Add a child at position i overwriting the old value.
     414             :  * Update the value of full_children and empty_children.
     415             :  */
     416          42 : static void put_child(struct key_vector *tn, unsigned long i,
     417             :                       struct key_vector *n)
     418             : {
     419          42 :         struct key_vector *chi = get_child(tn, i);
     420          42 :         int isfull, wasfull;
     421             : 
     422          42 :         BUG_ON(i >= child_length(tn));
     423             : 
     424             :         /* update emptyChildren, overflow into fullChildren */
     425          42 :         if (!n && chi)
     426           0 :                 empty_child_inc(tn);
     427          42 :         if (n && !chi)
     428          32 :                 empty_child_dec(tn);
     429             : 
     430             :         /* update fullChildren */
     431          42 :         wasfull = tnode_full(tn, chi);
     432          42 :         isfull = tnode_full(tn, n);
     433             : 
     434          42 :         if (wasfull && !isfull)
     435           2 :                 tn_info(tn)->full_children--;
     436          40 :         else if (!wasfull && isfull)
     437           3 :                 tn_info(tn)->full_children++;
     438             : 
     439          42 :         if (n && (tn->slen < n->slen))
     440           8 :                 tn->slen = n->slen;
     441             : 
     442          42 :         rcu_assign_pointer(tn->tnode[i], n);
     443          42 : }
     444             : 
     445           9 : static void update_children(struct key_vector *tn)
     446             : {
     447           9 :         unsigned long i;
     448             : 
     449             :         /* update all of the child parent pointers */
     450           9 :         for (i = child_length(tn); i;) {
     451          36 :                 struct key_vector *inode = get_child(tn, --i);
     452             : 
     453          36 :                 if (!inode)
     454          18 :                         continue;
     455             : 
     456             :                 /* Either update the children of a tnode that
     457             :                  * already belongs to us or update the child
     458             :                  * to point to ourselves.
     459             :                  */
     460          18 :                 if (node_parent(inode) == tn)
     461           2 :                         update_children(inode);
     462             :                 else
     463          61 :                         node_set_parent(inode, tn);
     464             :         }
     465           9 : }
     466             : 
     467          24 : static inline void put_child_root(struct key_vector *tp, t_key key,
     468             :                                   struct key_vector *n)
     469             : {
     470          24 :         if (IS_TRIE(tp))
     471           9 :                 rcu_assign_pointer(tp->tnode[0], n);
     472             :         else
     473          15 :                 put_child(tp, get_index(key, tp), n);
     474          24 : }
     475             : 
     476           7 : static inline void tnode_free_init(struct key_vector *tn)
     477             : {
     478           7 :         tn_info(tn)->rcu.next = NULL;
     479             : }
     480             : 
     481           3 : static inline void tnode_free_append(struct key_vector *tn,
     482             :                                      struct key_vector *n)
     483             : {
     484           3 :         tn_info(n)->rcu.next = tn_info(tn)->rcu.next;
     485           3 :         tn_info(tn)->rcu.next = &tn_info(n)->rcu;
     486             : }
     487             : 
     488           7 : static void tnode_free(struct key_vector *tn)
     489             : {
     490           7 :         struct callback_head *head = &tn_info(tn)->rcu;
     491             : 
     492          15 :         while (head) {
     493           8 :                 head = head->next;
     494           8 :                 tnode_free_size += TNODE_SIZE(1ul << tn->bits);
     495           8 :                 node_free(tn);
     496             : 
     497           8 :                 tn = container_of(head, struct tnode, rcu)->kv;
     498             :         }
     499             : 
     500           7 :         if (tnode_free_size >= sysctl_fib_sync_mem) {
     501           0 :                 tnode_free_size = 0;
     502           0 :                 synchronize_rcu();
     503             :         }
     504           7 : }
     505             : 
     506           7 : static struct key_vector *replace(struct trie *t,
     507             :                                   struct key_vector *oldtnode,
     508             :                                   struct key_vector *tn)
     509             : {
     510           7 :         struct key_vector *tp = node_parent(oldtnode);
     511           7 :         unsigned long i;
     512             : 
     513             :         /* setup the parent pointer out of and back into this node */
     514           7 :         NODE_INIT_PARENT(tn, tp);
     515           7 :         put_child_root(tp, tn->key, tn);
     516             : 
     517             :         /* update all of the child parent pointers */
     518           7 :         update_children(tn);
     519             : 
     520             :         /* all pointers should be clean so we are done */
     521           7 :         tnode_free(oldtnode);
     522             : 
     523             :         /* resize children now that oldtnode is freed */
     524           7 :         for (i = child_length(tn); i;) {
     525          32 :                 struct key_vector *inode = get_child(tn, --i);
     526             : 
     527             :                 /* resize child node */
     528          71 :                 if (tnode_full(tn, inode))
     529           2 :                         tn = resize(t, inode);
     530             :         }
     531             : 
     532           7 :         return tp;
     533             : }
     534             : 
     535           7 : static struct key_vector *inflate(struct trie *t,
     536             :                                   struct key_vector *oldtnode)
     537             : {
     538           7 :         struct key_vector *tn;
     539           7 :         unsigned long i;
     540           7 :         t_key m;
     541             : 
     542           7 :         pr_debug("In inflate\n");
     543             : 
     544           7 :         tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
     545           7 :         if (!tn)
     546           0 :                 goto notnode;
     547             : 
     548             :         /* prepare oldtnode to be freed */
     549           7 :         tnode_free_init(oldtnode);
     550             : 
     551             :         /* Assemble all of the pointers in our cluster, in this case that
     552             :          * represents all of the pointers out of our allocated nodes that
     553             :          * point to existing tnodes and the links between our allocated
     554             :          * nodes.
     555             :          */
     556          23 :         for (i = child_length(oldtnode), m = 1u << tn->pos; i;) {
     557          16 :                 struct key_vector *inode = get_child(oldtnode, --i);
     558          16 :                 struct key_vector *node0, *node1;
     559          16 :                 unsigned long j, k;
     560             : 
     561             :                 /* An empty child */
     562          16 :                 if (!inode)
     563           1 :                         continue;
     564             : 
     565             :                 /* A leaf or an internal node with skipped bits */
     566          15 :                 if (!tnode_full(oldtnode, inode)) {
     567          14 :                         put_child(tn, get_index(inode->key, tn), inode);
     568          14 :                         continue;
     569             :                 }
     570             : 
     571             :                 /* drop the node in the old tnode free list */
     572           1 :                 tnode_free_append(oldtnode, inode);
     573             : 
     574             :                 /* An internal node with two children */
     575           1 :                 if (inode->bits == 1) {
     576           0 :                         put_child(tn, 2 * i + 1, get_child(inode, 1));
     577           0 :                         put_child(tn, 2 * i, get_child(inode, 0));
     578           0 :                         continue;
     579             :                 }
     580             : 
     581             :                 /* We will replace this node 'inode' with two new
     582             :                  * ones, 'node0' and 'node1', each with half of the
     583             :                  * original children. The two new nodes will have
     584             :                  * a position one bit further down the key and this
     585             :                  * means that the "significant" part of their keys
     586             :                  * (see the discussion near the top of this file)
     587             :                  * will differ by one bit, which will be "0" in
     588             :                  * node0's key and "1" in node1's key. Since we are
     589             :                  * moving the key position by one step, the bit that
     590             :                  * we are moving away from - the bit at position
     591             :                  * (tn->pos) - is the one that will differ between
     592             :                  * node0 and node1. So... we synthesize that bit in the
     593             :                  * two new keys.
     594             :                  */
     595           1 :                 node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
     596           1 :                 if (!node1)
     597           0 :                         goto nomem;
     598           1 :                 node0 = tnode_new(inode->key, inode->pos, inode->bits - 1);
     599             : 
     600           1 :                 tnode_free_append(tn, node1);
     601           1 :                 if (!node0)
     602           0 :                         goto nomem;
     603           1 :                 tnode_free_append(tn, node0);
     604             : 
     605             :                 /* populate child pointers in new nodes */
     606           2 :                 for (k = child_length(inode), j = k / 2; j;) {
     607           1 :                         put_child(node1, --j, get_child(inode, --k));
     608           1 :                         put_child(node0, j, get_child(inode, j));
     609           1 :                         put_child(node1, --j, get_child(inode, --k));
     610           1 :                         put_child(node0, j, get_child(inode, j));
     611             :                 }
     612             : 
     613             :                 /* link new nodes to parent */
     614           1 :                 NODE_INIT_PARENT(node1, tn);
     615           1 :                 NODE_INIT_PARENT(node0, tn);
     616             : 
     617             :                 /* link parent to nodes */
     618           1 :                 put_child(tn, 2 * i + 1, node1);
     619           1 :                 put_child(tn, 2 * i, node0);
     620             :         }
     621             : 
     622             :         /* setup the parent pointers into and out of this node */
     623           7 :         return replace(t, oldtnode, tn);
     624           0 : nomem:
     625             :         /* all pointers should be clean so we are done */
     626           0 :         tnode_free(tn);
     627             : notnode:
     628             :         return NULL;
     629             : }
     630             : 
     631           0 : static struct key_vector *halve(struct trie *t,
     632             :                                 struct key_vector *oldtnode)
     633             : {
     634           0 :         struct key_vector *tn;
     635           0 :         unsigned long i;
     636             : 
     637           0 :         pr_debug("In halve\n");
     638             : 
     639           0 :         tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
     640           0 :         if (!tn)
     641           0 :                 goto notnode;
     642             : 
     643             :         /* prepare oldtnode to be freed */
     644           0 :         tnode_free_init(oldtnode);
     645             : 
     646             :         /* Assemble all of the pointers in our cluster, in this case that
     647             :          * represents all of the pointers out of our allocated nodes that
     648             :          * point to existing tnodes and the links between our allocated
     649             :          * nodes.
     650             :          */
     651           0 :         for (i = child_length(oldtnode); i;) {
     652           0 :                 struct key_vector *node1 = get_child(oldtnode, --i);
     653           0 :                 struct key_vector *node0 = get_child(oldtnode, --i);
     654           0 :                 struct key_vector *inode;
     655             : 
     656             :                 /* At least one of the children is empty */
     657           0 :                 if (!node1 || !node0) {
     658           0 :                         put_child(tn, i / 2, node1 ? : node0);
     659           0 :                         continue;
     660             :                 }
     661             : 
     662             :                 /* Two nonempty children */
     663           0 :                 inode = tnode_new(node0->key, oldtnode->pos, 1);
     664           0 :                 if (!inode)
     665           0 :                         goto nomem;
     666           0 :                 tnode_free_append(tn, inode);
     667             : 
     668             :                 /* initialize pointers out of node */
     669           0 :                 put_child(inode, 1, node1);
     670           0 :                 put_child(inode, 0, node0);
     671           0 :                 NODE_INIT_PARENT(inode, tn);
     672             : 
     673             :                 /* link parent to node */
     674           0 :                 put_child(tn, i / 2, inode);
     675             :         }
     676             : 
     677             :         /* setup the parent pointers into and out of this node */
     678           0 :         return replace(t, oldtnode, tn);
     679           0 : nomem:
     680             :         /* all pointers should be clean so we are done */
     681           0 :         tnode_free(tn);
     682             : notnode:
     683             :         return NULL;
     684             : }
     685             : 
     686           2 : static struct key_vector *collapse(struct trie *t,
     687             :                                    struct key_vector *oldtnode)
     688             : {
     689           2 :         struct key_vector *n, *tp;
     690           2 :         unsigned long i;
     691             : 
     692             :         /* scan the tnode looking for that one child that might still exist */
     693           5 :         for (n = NULL, i = child_length(oldtnode); !n && i;)
     694           3 :                 n = get_child(oldtnode, --i);
     695             : 
     696             :         /* compress one level */
     697           2 :         tp = node_parent(oldtnode);
     698           2 :         put_child_root(tp, oldtnode->key, n);
     699           2 :         node_set_parent(n, tp);
     700             : 
     701             :         /* drop dead node */
     702           2 :         node_free(oldtnode);
     703             : 
     704           2 :         return tp;
     705             : }
     706             : 
     707           0 : static unsigned char update_suffix(struct key_vector *tn)
     708             : {
     709           0 :         unsigned char slen = tn->pos;
     710           0 :         unsigned long stride, i;
     711           0 :         unsigned char slen_max;
     712             : 
     713             :         /* only vector 0 can have a suffix length greater than or equal to
     714             :          * tn->pos + tn->bits, the second highest node will have a suffix
     715             :          * length at most of tn->pos + tn->bits - 1
     716             :          */
     717           0 :         slen_max = min_t(unsigned char, tn->pos + tn->bits - 1, tn->slen);
     718             : 
     719             :         /* search though the list of children looking for nodes that might
     720             :          * have a suffix greater than the one we currently have.  This is
     721             :          * why we start with a stride of 2 since a stride of 1 would
     722             :          * represent the nodes with suffix length equal to tn->pos
     723             :          */
     724           0 :         for (i = 0, stride = 0x2ul ; i < child_length(tn); i += stride) {
     725           0 :                 struct key_vector *n = get_child(tn, i);
     726             : 
     727           0 :                 if (!n || (n->slen <= slen))
     728           0 :                         continue;
     729             : 
     730             :                 /* update stride and slen based on new value */
     731           0 :                 stride <<= (n->slen - slen);
     732           0 :                 slen = n->slen;
     733           0 :                 i &= ~(stride - 1);
     734             : 
     735             :                 /* stop searching if we have hit the maximum possible value */
     736           0 :                 if (slen >= slen_max)
     737             :                         break;
     738             :         }
     739             : 
     740           0 :         tn->slen = slen;
     741             : 
     742           0 :         return slen;
     743             : }
     744             : 
     745             : /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
     746             :  * the Helsinki University of Technology and Matti Tikkanen of Nokia
     747             :  * Telecommunications, page 6:
     748             :  * "A node is doubled if the ratio of non-empty children to all
     749             :  * children in the *doubled* node is at least 'high'."
     750             :  *
     751             :  * 'high' in this instance is the variable 'inflate_threshold'. It
     752             :  * is expressed as a percentage, so we multiply it with
     753             :  * child_length() and instead of multiplying by 2 (since the
     754             :  * child array will be doubled by inflate()) and multiplying
     755             :  * the left-hand side by 100 (to handle the percentage thing) we
     756             :  * multiply the left-hand side by 50.
     757             :  *
     758             :  * The left-hand side may look a bit weird: child_length(tn)
     759             :  * - tn->empty_children is of course the number of non-null children
     760             :  * in the current node. tn->full_children is the number of "full"
     761             :  * children, that is non-null tnodes with a skip value of 0.
     762             :  * All of those will be doubled in the resulting inflated tnode, so
     763             :  * we just count them one extra time here.
     764             :  *
     765             :  * A clearer way to write this would be:
     766             :  *
     767             :  * to_be_doubled = tn->full_children;
     768             :  * not_to_be_doubled = child_length(tn) - tn->empty_children -
     769             :  *     tn->full_children;
     770             :  *
     771             :  * new_child_length = child_length(tn) * 2;
     772             :  *
     773             :  * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
     774             :  *      new_child_length;
     775             :  * if (new_fill_factor >= inflate_threshold)
     776             :  *
     777             :  * ...and so on, tho it would mess up the while () loop.
     778             :  *
     779             :  * anyway,
     780             :  * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
     781             :  *      inflate_threshold
     782             :  *
     783             :  * avoid a division:
     784             :  * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
     785             :  *      inflate_threshold * new_child_length
     786             :  *
     787             :  * expand not_to_be_doubled and to_be_doubled, and shorten:
     788             :  * 100 * (child_length(tn) - tn->empty_children +
     789             :  *    tn->full_children) >= inflate_threshold * new_child_length
     790             :  *
     791             :  * expand new_child_length:
     792             :  * 100 * (child_length(tn) - tn->empty_children +
     793             :  *    tn->full_children) >=
     794             :  *      inflate_threshold * child_length(tn) * 2
     795             :  *
     796             :  * shorten again:
     797             :  * 50 * (tn->full_children + child_length(tn) -
     798             :  *    tn->empty_children) >= inflate_threshold *
     799             :  *    child_length(tn)
     800             :  *
     801             :  */
     802          20 : static inline bool should_inflate(struct key_vector *tp, struct key_vector *tn)
     803             : {
     804          20 :         unsigned long used = child_length(tn);
     805          20 :         unsigned long threshold = used;
     806             : 
     807             :         /* Keep root node larger */
     808          20 :         threshold *= IS_TRIE(tp) ? inflate_threshold_root : inflate_threshold;
     809          20 :         used -= tn_info(tn)->empty_children;
     810          20 :         used += tn_info(tn)->full_children;
     811             : 
     812             :         /* if bits == KEYLENGTH then pos = 0, and will fail below */
     813             : 
     814          20 :         return (used > 1) && tn->pos && ((50 * used) >= threshold);
     815             : }
     816             : 
     817           7 : static inline bool should_halve(struct key_vector *tp, struct key_vector *tn)
     818             : {
     819           7 :         unsigned long used = child_length(tn);
     820           7 :         unsigned long threshold = used;
     821             : 
     822             :         /* Keep root node larger */
     823           7 :         threshold *= IS_TRIE(tp) ? halve_threshold_root : halve_threshold;
     824           7 :         used -= tn_info(tn)->empty_children;
     825             : 
     826             :         /* if bits == KEYLENGTH then used = 100% on wrap, and will fail below */
     827             : 
     828           7 :         return (used > 1) && (tn->bits > 1) && ((100 * used) < threshold);
     829             : }
     830             : 
     831           7 : static inline bool should_collapse(struct key_vector *tn)
     832             : {
     833           7 :         unsigned long used = child_length(tn);
     834             : 
     835           7 :         used -= tn_info(tn)->empty_children;
     836             : 
     837             :         /* account for bits == KEYLENGTH case */
     838           7 :         if ((tn->bits == KEYLENGTH) && tn_info(tn)->full_children)
     839           0 :                 used -= KEY_MAX;
     840             : 
     841             :         /* One child or none, time to drop us from the trie */
     842           7 :         return used < 2;
     843             : }
     844             : 
     845             : #define MAX_WORK 10
     846          13 : static struct key_vector *resize(struct trie *t, struct key_vector *tn)
     847             : {
     848             : #ifdef CONFIG_IP_FIB_TRIE_STATS
     849             :         struct trie_use_stats __percpu *stats = t->stats;
     850             : #endif
     851          13 :         struct key_vector *tp = node_parent(tn);
     852          13 :         unsigned long cindex = get_index(tn->key, tp);
     853          13 :         int max_work = MAX_WORK;
     854             : 
     855          13 :         pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
     856             :                  tn, inflate_threshold, halve_threshold);
     857             : 
     858             :         /* track the tnode via the pointer from the parent instead of
     859             :          * doing it ourselves.  This way we can let RCU fully do its
     860             :          * thing without us interfering
     861             :          */
     862          13 :         BUG_ON(tn != get_child(tp, cindex));
     863             : 
     864             :         /* Double as long as the resulting node has a number of
     865             :          * nonempty nodes that are above the threshold.
     866             :          */
     867          20 :         while (should_inflate(tp, tn) && max_work) {
     868           7 :                 tp = inflate(t, tn);
     869           7 :                 if (!tp) {
     870             : #ifdef CONFIG_IP_FIB_TRIE_STATS
     871             :                         this_cpu_inc(stats->resize_node_skipped);
     872             : #endif
     873             :                         break;
     874             :                 }
     875             : 
     876           7 :                 max_work--;
     877           7 :                 tn = get_child(tp, cindex);
     878             :         }
     879             : 
     880             :         /* update parent in case inflate failed */
     881          13 :         tp = node_parent(tn);
     882             : 
     883             :         /* Return if at least one inflate is run */
     884          13 :         if (max_work != MAX_WORK)
     885             :                 return tp;
     886             : 
     887             :         /* Halve as long as the number of empty children in this
     888             :          * node is above threshold.
     889             :          */
     890           7 :         while (should_halve(tp, tn) && max_work) {
     891           0 :                 tp = halve(t, tn);
     892           0 :                 if (!tp) {
     893             : #ifdef CONFIG_IP_FIB_TRIE_STATS
     894             :                         this_cpu_inc(stats->resize_node_skipped);
     895             : #endif
     896             :                         break;
     897             :                 }
     898             : 
     899           0 :                 max_work--;
     900           0 :                 tn = get_child(tp, cindex);
     901             :         }
     902             : 
     903             :         /* Only one child remains */
     904           7 :         if (should_collapse(tn))
     905           2 :                 return collapse(t, tn);
     906             : 
     907             :         /* update parent in case halve failed */
     908           5 :         return node_parent(tn);
     909             : }
     910             : 
     911           0 : static void node_pull_suffix(struct key_vector *tn, unsigned char slen)
     912             : {
     913           0 :         unsigned char node_slen = tn->slen;
     914             : 
     915           0 :         while ((node_slen > tn->pos) && (node_slen > slen)) {
     916           0 :                 slen = update_suffix(tn);
     917           0 :                 if (node_slen == slen)
     918             :                         break;
     919             : 
     920           0 :                 tn = node_parent(tn);
     921           0 :                 node_slen = tn->slen;
     922             :         }
     923           0 : }
     924             : 
     925           8 : static void node_push_suffix(struct key_vector *tn, unsigned char slen)
     926             : {
     927          13 :         while (tn->slen < slen) {
     928           5 :                 tn->slen = slen;
     929           5 :                 tn = node_parent(tn);
     930             :         }
     931           8 : }
     932             : 
     933             : /* rcu_read_lock needs to be hold by caller from readside */
     934          20 : static struct key_vector *fib_find_node(struct trie *t,
     935             :                                         struct key_vector **tp, u32 key)
     936             : {
     937          20 :         struct key_vector *pn, *n = t->kv;
     938          20 :         unsigned long index = 0;
     939             : 
     940          41 :         do {
     941          41 :                 pn = n;
     942          41 :                 n = get_child_rcu(n, index);
     943             : 
     944          41 :                 if (!n)
     945             :                         break;
     946             : 
     947          40 :                 index = get_cindex(key, n);
     948             : 
     949             :                 /* This bit of code is a bit tricky but it combines multiple
     950             :                  * checks into a single check.  The prefix consists of the
     951             :                  * prefix plus zeros for the bits in the cindex. The index
     952             :                  * is the difference between the key and this value.  From
     953             :                  * this we can actually derive several pieces of data.
     954             :                  *   if (index >= (1ul << bits))
     955             :                  *     we have a mismatch in skip bits and failed
     956             :                  *   else
     957             :                  *     we know the value is cindex
     958             :                  *
     959             :                  * This check is safe even if bits == KEYLENGTH due to the
     960             :                  * fact that we can only allocate a node with 32 bits if a
     961             :                  * long is greater than 32 bits.
     962             :                  */
     963          40 :                 if (index >= (1ul << n->bits)) {
     964             :                         n = NULL;
     965             :                         break;
     966             :                 }
     967             : 
     968             :                 /* keep searching until we find a perfect match leaf or NULL */
     969          33 :         } while (IS_TNODE(n));
     970             : 
     971          20 :         *tp = pn;
     972             : 
     973          20 :         return n;
     974             : }
     975             : 
     976             : /* Return the first fib alias matching TOS with
     977             :  * priority less than or equal to PRIO.
     978             :  * If 'find_first' is set, return the first matching
     979             :  * fib alias, regardless of TOS and priority.
     980             :  */
     981          14 : static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
     982             :                                         u8 tos, u32 prio, u32 tb_id,
     983             :                                         bool find_first)
     984             : {
     985          14 :         struct fib_alias *fa;
     986             : 
     987          14 :         if (!fah)
     988             :                 return NULL;
     989             : 
     990          28 :         hlist_for_each_entry(fa, fah, fa_list) {
     991          14 :                 if (fa->fa_slen < slen)
     992           0 :                         continue;
     993          14 :                 if (fa->fa_slen != slen)
     994             :                         break;
     995          12 :                 if (fa->tb_id > tb_id)
     996           0 :                         continue;
     997          12 :                 if (fa->tb_id != tb_id)
     998             :                         break;
     999          12 :                 if (find_first)
    1000          10 :                         return fa;
    1001           2 :                 if (fa->fa_tos > tos)
    1002           0 :                         continue;
    1003           2 :                 if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
    1004           2 :                         return fa;
    1005             :         }
    1006             : 
    1007             :         return NULL;
    1008             : }
    1009             : 
    1010             : static struct fib_alias *
    1011           0 : fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri)
    1012             : {
    1013           0 :         u8 slen = KEYLENGTH - fri->dst_len;
    1014           0 :         struct key_vector *l, *tp;
    1015           0 :         struct fib_table *tb;
    1016           0 :         struct fib_alias *fa;
    1017           0 :         struct trie *t;
    1018             : 
    1019           0 :         tb = fib_get_table(net, fri->tb_id);
    1020           0 :         if (!tb)
    1021             :                 return NULL;
    1022             : 
    1023           0 :         t = (struct trie *)tb->tb_data;
    1024           0 :         l = fib_find_node(t, &tp, be32_to_cpu(fri->dst));
    1025           0 :         if (!l)
    1026             :                 return NULL;
    1027             : 
    1028           0 :         hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
    1029           0 :                 if (fa->fa_slen == slen && fa->tb_id == fri->tb_id &&
    1030           0 :                     fa->fa_tos == fri->tos && fa->fa_info == fri->fi &&
    1031           0 :                     fa->fa_type == fri->type)
    1032           0 :                         return fa;
    1033             :         }
    1034             : 
    1035             :         return NULL;
    1036             : }
    1037             : 
    1038           0 : void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
    1039             : {
    1040           0 :         struct fib_alias *fa_match;
    1041           0 :         struct sk_buff *skb;
    1042           0 :         int err;
    1043             : 
    1044           0 :         rcu_read_lock();
    1045             : 
    1046           0 :         fa_match = fib_find_matching_alias(net, fri);
    1047           0 :         if (!fa_match)
    1048           0 :                 goto out;
    1049             : 
    1050           0 :         if (fa_match->offload == fri->offload && fa_match->trap == fri->trap &&
    1051             :             fa_match->offload_failed == fri->offload_failed)
    1052           0 :                 goto out;
    1053             : 
    1054           0 :         fa_match->offload = fri->offload;
    1055           0 :         fa_match->trap = fri->trap;
    1056             : 
    1057             :         /* 2 means send notifications only if offload_failed was changed. */
    1058           0 :         if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
    1059           0 :             fa_match->offload_failed == fri->offload_failed)
    1060           0 :                 goto out;
    1061             : 
    1062           0 :         fa_match->offload_failed = fri->offload_failed;
    1063             : 
    1064           0 :         if (!net->ipv4.sysctl_fib_notify_on_flag_change)
    1065           0 :                 goto out;
    1066             : 
    1067           0 :         skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
    1068           0 :         if (!skb) {
    1069           0 :                 err = -ENOBUFS;
    1070           0 :                 goto errout;
    1071             :         }
    1072             : 
    1073           0 :         err = fib_dump_info(skb, 0, 0, RTM_NEWROUTE, fri, 0);
    1074           0 :         if (err < 0) {
    1075             :                 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
    1076           0 :                 WARN_ON(err == -EMSGSIZE);
    1077           0 :                 kfree_skb(skb);
    1078           0 :                 goto errout;
    1079             :         }
    1080             : 
    1081           0 :         rtnl_notify(skb, net, 0, RTNLGRP_IPV4_ROUTE, NULL, GFP_ATOMIC);
    1082           0 :         goto out;
    1083             : 
    1084           0 : errout:
    1085           0 :         rtnl_set_sk_err(net, RTNLGRP_IPV4_ROUTE, err);
    1086           0 : out:
    1087           0 :         rcu_read_unlock();
    1088           0 : }
    1089             : EXPORT_SYMBOL_GPL(fib_alias_hw_flags_set);
    1090             : 
    1091           8 : static void trie_rebalance(struct trie *t, struct key_vector *tn)
    1092             : {
    1093          19 :         while (!IS_TRIE(tn))
    1094          11 :                 tn = resize(t, tn);
    1095           8 : }
    1096             : 
    1097           8 : static int fib_insert_node(struct trie *t, struct key_vector *tp,
    1098             :                            struct fib_alias *new, t_key key)
    1099             : {
    1100           8 :         struct key_vector *n, *l;
    1101             : 
    1102           8 :         l = leaf_new(key, new);
    1103           8 :         if (!l)
    1104           0 :                 goto noleaf;
    1105             : 
    1106             :         /* retrieve child from parent node */
    1107           8 :         n = get_child(tp, get_index(key, tp));
    1108             : 
    1109             :         /* Case 2: n is a LEAF or a TNODE and the key doesn't match.
    1110             :          *
    1111             :          *  Add a new tnode here
    1112             :          *  first tnode need some special handling
    1113             :          *  leaves us in position for handling as case 3
    1114             :          */
    1115           8 :         if (n) {
    1116           7 :                 struct key_vector *tn;
    1117             : 
    1118           7 :                 tn = tnode_new(key, __fls(key ^ n->key), 1);
    1119           7 :                 if (!tn)
    1120           0 :                         goto notnode;
    1121             : 
    1122             :                 /* initialize routes out of node */
    1123           7 :                 NODE_INIT_PARENT(tn, tp);
    1124           7 :                 put_child(tn, get_index(key, tn) ^ 1, n);
    1125             : 
    1126             :                 /* start adding routes into the node */
    1127           7 :                 put_child_root(tp, key, tn);
    1128           7 :                 node_set_parent(n, tn);
    1129             : 
    1130             :                 /* parent now has a NULL spot where the leaf can go */
    1131           7 :                 tp = tn;
    1132             :         }
    1133             : 
    1134             :         /* Case 3: n is NULL, and will just insert a new leaf */
    1135           8 :         node_push_suffix(tp, new->fa_slen);
    1136           8 :         NODE_INIT_PARENT(l, tp);
    1137           8 :         put_child_root(tp, key, l);
    1138           8 :         trie_rebalance(t, tp);
    1139             : 
    1140           8 :         return 0;
    1141           0 : notnode:
    1142           0 :         node_free(l);
    1143             : noleaf:
    1144             :         return -ENOMEM;
    1145             : }
    1146             : 
    1147          10 : static int fib_insert_alias(struct trie *t, struct key_vector *tp,
    1148             :                             struct key_vector *l, struct fib_alias *new,
    1149             :                             struct fib_alias *fa, t_key key)
    1150             : {
    1151          10 :         if (!l)
    1152           8 :                 return fib_insert_node(t, tp, new, key);
    1153             : 
    1154           2 :         if (fa) {
    1155           0 :                 hlist_add_before_rcu(&new->fa_list, &fa->fa_list);
    1156             :         } else {
    1157           2 :                 struct fib_alias *last;
    1158             : 
    1159           4 :                 hlist_for_each_entry(last, &l->leaf, fa_list) {
    1160           2 :                         if (new->fa_slen < last->fa_slen)
    1161             :                                 break;
    1162           0 :                         if ((new->fa_slen == last->fa_slen) &&
    1163           0 :                             (new->tb_id > last->tb_id))
    1164             :                                 break;
    1165           0 :                         fa = last;
    1166             :                 }
    1167             : 
    1168           2 :                 if (fa)
    1169           0 :                         hlist_add_behind_rcu(&new->fa_list, &fa->fa_list);
    1170             :                 else
    1171           2 :                         hlist_add_head_rcu(&new->fa_list, &l->leaf);
    1172             :         }
    1173             : 
    1174             :         /* if we added to the tail node then we need to update slen */
    1175           2 :         if (l->slen < new->fa_slen) {
    1176           0 :                 l->slen = new->fa_slen;
    1177           0 :                 node_push_suffix(tp, new->fa_slen);
    1178             :         }
    1179             : 
    1180             :         return 0;
    1181             : }
    1182             : 
    1183          12 : static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
    1184             : {
    1185          12 :         if (plen > KEYLENGTH) {
    1186           0 :                 NL_SET_ERR_MSG(extack, "Invalid prefix length");
    1187           0 :                 return false;
    1188             :         }
    1189             : 
    1190          12 :         if ((plen < KEYLENGTH) && (key << plen)) {
    1191           0 :                 NL_SET_ERR_MSG(extack,
    1192             :                                "Invalid prefix for given prefix length");
    1193           0 :                 return false;
    1194             :         }
    1195             : 
    1196             :         return true;
    1197             : }
    1198             : 
    1199             : static void fib_remove_alias(struct trie *t, struct key_vector *tp,
    1200             :                              struct key_vector *l, struct fib_alias *old);
    1201             : 
    1202             : /* Caller must hold RTNL. */
    1203          12 : int fib_table_insert(struct net *net, struct fib_table *tb,
    1204             :                      struct fib_config *cfg, struct netlink_ext_ack *extack)
    1205             : {
    1206          12 :         struct trie *t = (struct trie *)tb->tb_data;
    1207          12 :         struct fib_alias *fa, *new_fa;
    1208          12 :         struct key_vector *l, *tp;
    1209          12 :         u16 nlflags = NLM_F_EXCL;
    1210          12 :         struct fib_info *fi;
    1211          12 :         u8 plen = cfg->fc_dst_len;
    1212          12 :         u8 slen = KEYLENGTH - plen;
    1213          12 :         u8 tos = cfg->fc_tos;
    1214          12 :         u32 key;
    1215          12 :         int err;
    1216             : 
    1217          12 :         key = ntohl(cfg->fc_dst);
    1218             : 
    1219          12 :         if (!fib_valid_key_len(key, plen, extack))
    1220             :                 return -EINVAL;
    1221             : 
    1222          12 :         pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
    1223             : 
    1224          12 :         fi = fib_create_info(cfg, extack);
    1225          12 :         if (IS_ERR(fi)) {
    1226           0 :                 err = PTR_ERR(fi);
    1227           0 :                 goto err;
    1228             :         }
    1229             : 
    1230          12 :         l = fib_find_node(t, &tp, key);
    1231           4 :         fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
    1232          12 :                                 tb->tb_id, false) : NULL;
    1233             : 
    1234             :         /* Now fa, if non-NULL, points to the first fib alias
    1235             :          * with the same keys [prefix,tos,priority], if such key already
    1236             :          * exists or to the node before which we will insert new one.
    1237             :          *
    1238             :          * If fa is NULL, we will need to allocate a new one and
    1239             :          * insert to the tail of the section matching the suffix length
    1240             :          * of the new alias.
    1241             :          */
    1242             : 
    1243           4 :         if (fa && fa->fa_tos == tos &&
    1244           2 :             fa->fa_info->fib_priority == fi->fib_priority) {
    1245           2 :                 struct fib_alias *fa_first, *fa_match;
    1246             : 
    1247           2 :                 err = -EEXIST;
    1248           2 :                 if (cfg->fc_nlflags & NLM_F_EXCL)
    1249           0 :                         goto out;
    1250             : 
    1251           4 :                 nlflags &= ~NLM_F_EXCL;
    1252             : 
    1253             :                 /* We have 2 goals:
    1254             :                  * 1. Find exact match for type, scope, fib_info to avoid
    1255             :                  * duplicate routes
    1256             :                  * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
    1257             :                  */
    1258           2 :                 fa_match = NULL;
    1259           4 :                 fa_first = fa;
    1260           2 :                 hlist_for_each_entry_from(fa, fa_list) {
    1261           2 :                         if ((fa->fa_slen != slen) ||
    1262           2 :                             (fa->tb_id != tb->tb_id) ||
    1263           2 :                             (fa->fa_tos != tos))
    1264             :                                 break;
    1265           2 :                         if (fa->fa_info->fib_priority != fi->fib_priority)
    1266             :                                 break;
    1267           2 :                         if (fa->fa_type == cfg->fc_type &&
    1268             :                             fa->fa_info == fi) {
    1269             :                                 fa_match = fa;
    1270             :                                 break;
    1271             :                         }
    1272             :                 }
    1273             : 
    1274           2 :                 if (cfg->fc_nlflags & NLM_F_REPLACE) {
    1275           0 :                         struct fib_info *fi_drop;
    1276           0 :                         u8 state;
    1277             : 
    1278           0 :                         nlflags |= NLM_F_REPLACE;
    1279           0 :                         fa = fa_first;
    1280           0 :                         if (fa_match) {
    1281           0 :                                 if (fa == fa_match)
    1282           0 :                                         err = 0;
    1283           0 :                                 goto out;
    1284             :                         }
    1285           0 :                         err = -ENOBUFS;
    1286           0 :                         new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
    1287           0 :                         if (!new_fa)
    1288           0 :                                 goto out;
    1289             : 
    1290           0 :                         fi_drop = fa->fa_info;
    1291           0 :                         new_fa->fa_tos = fa->fa_tos;
    1292           0 :                         new_fa->fa_info = fi;
    1293           0 :                         new_fa->fa_type = cfg->fc_type;
    1294           0 :                         state = fa->fa_state;
    1295           0 :                         new_fa->fa_state = state & ~FA_S_ACCESSED;
    1296           0 :                         new_fa->fa_slen = fa->fa_slen;
    1297           0 :                         new_fa->tb_id = tb->tb_id;
    1298           0 :                         new_fa->fa_default = -1;
    1299           0 :                         new_fa->offload = 0;
    1300           0 :                         new_fa->trap = 0;
    1301           0 :                         new_fa->offload_failed = 0;
    1302             : 
    1303           0 :                         hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
    1304             : 
    1305           0 :                         if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
    1306             :                                            tb->tb_id, true) == new_fa) {
    1307           0 :                                 enum fib_event_type fib_event;
    1308             : 
    1309           0 :                                 fib_event = FIB_EVENT_ENTRY_REPLACE;
    1310           0 :                                 err = call_fib_entry_notifiers(net, fib_event,
    1311             :                                                                key, plen,
    1312             :                                                                new_fa, extack);
    1313           0 :                                 if (err) {
    1314           0 :                                         hlist_replace_rcu(&new_fa->fa_list,
    1315             :                                                           &fa->fa_list);
    1316           0 :                                         goto out_free_new_fa;
    1317             :                                 }
    1318             :                         }
    1319             : 
    1320           0 :                         rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
    1321           0 :                                   tb->tb_id, &cfg->fc_nlinfo, nlflags);
    1322             : 
    1323           0 :                         alias_free_mem_rcu(fa);
    1324             : 
    1325           0 :                         fib_release_info(fi_drop);
    1326           0 :                         if (state & FA_S_ACCESSED)
    1327           0 :                                 rt_cache_flush(cfg->fc_nlinfo.nl_net);
    1328             : 
    1329           0 :                         goto succeeded;
    1330             :                 }
    1331             :                 /* Error if we find a perfect match which
    1332             :                  * uses the same scope, type, and nexthop
    1333             :                  * information.
    1334             :                  */
    1335           2 :                 if (fa_match)
    1336           2 :                         goto out;
    1337             : 
    1338           0 :                 if (cfg->fc_nlflags & NLM_F_APPEND)
    1339             :                         nlflags |= NLM_F_APPEND;
    1340             :                 else
    1341           0 :                         fa = fa_first;
    1342             :         }
    1343          10 :         err = -ENOENT;
    1344          10 :         if (!(cfg->fc_nlflags & NLM_F_CREATE))
    1345           0 :                 goto out;
    1346             : 
    1347          10 :         nlflags |= NLM_F_CREATE;
    1348          10 :         err = -ENOBUFS;
    1349          10 :         new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
    1350          10 :         if (!new_fa)
    1351           0 :                 goto out;
    1352             : 
    1353          10 :         new_fa->fa_info = fi;
    1354          10 :         new_fa->fa_tos = tos;
    1355          10 :         new_fa->fa_type = cfg->fc_type;
    1356          10 :         new_fa->fa_state = 0;
    1357          10 :         new_fa->fa_slen = slen;
    1358          10 :         new_fa->tb_id = tb->tb_id;
    1359          10 :         new_fa->fa_default = -1;
    1360          10 :         new_fa->offload = 0;
    1361          10 :         new_fa->trap = 0;
    1362          10 :         new_fa->offload_failed = 0;
    1363             : 
    1364             :         /* Insert new entry to the list. */
    1365          10 :         err = fib_insert_alias(t, tp, l, new_fa, fa, key);
    1366          10 :         if (err)
    1367           0 :                 goto out_free_new_fa;
    1368             : 
    1369             :         /* The alias was already inserted, so the node must exist. */
    1370          10 :         l = l ? l : fib_find_node(t, &tp, key);
    1371          10 :         if (WARN_ON_ONCE(!l))
    1372           0 :                 goto out_free_new_fa;
    1373             : 
    1374          10 :         if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
    1375             :             new_fa) {
    1376          10 :                 enum fib_event_type fib_event;
    1377             : 
    1378          10 :                 fib_event = FIB_EVENT_ENTRY_REPLACE;
    1379          10 :                 err = call_fib_entry_notifiers(net, fib_event, key, plen,
    1380             :                                                new_fa, extack);
    1381          10 :                 if (err)
    1382           0 :                         goto out_remove_new_fa;
    1383             :         }
    1384             : 
    1385          10 :         if (!plen)
    1386           1 :                 tb->tb_num_default++;
    1387             : 
    1388          10 :         rt_cache_flush(cfg->fc_nlinfo.nl_net);
    1389          10 :         rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
    1390          10 :                   &cfg->fc_nlinfo, nlflags);
    1391             : succeeded:
    1392             :         return 0;
    1393             : 
    1394           0 : out_remove_new_fa:
    1395           0 :         fib_remove_alias(t, tp, l, new_fa);
    1396           0 : out_free_new_fa:
    1397           0 :         kmem_cache_free(fn_alias_kmem, new_fa);
    1398           2 : out:
    1399           2 :         fib_release_info(fi);
    1400             : err:
    1401             :         return err;
    1402             : }
    1403             : 
    1404          49 : static inline t_key prefix_mismatch(t_key key, struct key_vector *n)
    1405             : {
    1406          49 :         t_key prefix = n->key;
    1407             : 
    1408          49 :         return (key ^ prefix) & (prefix | -prefix);
    1409             : }
    1410             : 
    1411          67 : bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags,
    1412             :                          const struct flowi4 *flp)
    1413             : {
    1414          67 :         if (nhc->nhc_flags & RTNH_F_DEAD)
    1415             :                 return false;
    1416             : 
    1417          67 :         if (ip_ignore_linkdown(nhc->nhc_dev) &&
    1418           0 :             nhc->nhc_flags & RTNH_F_LINKDOWN &&
    1419           0 :             !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
    1420             :                 return false;
    1421             : 
    1422          67 :         if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
    1423          67 :                 if (flp->flowi4_oif &&
    1424           1 :                     flp->flowi4_oif != nhc->nhc_oif)
    1425           0 :                         return false;
    1426             :         }
    1427             : 
    1428             :         return true;
    1429             : }
    1430             : 
    1431             : /* should be called with rcu_read_lock */
    1432          87 : int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
    1433             :                      struct fib_result *res, int fib_flags)
    1434             : {
    1435          87 :         struct trie *t = (struct trie *) tb->tb_data;
    1436             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    1437             :         struct trie_use_stats __percpu *stats = t->stats;
    1438             : #endif
    1439          87 :         const t_key key = ntohl(flp->daddr);
    1440          87 :         struct key_vector *n, *pn;
    1441          87 :         struct fib_alias *fa;
    1442          87 :         unsigned long index;
    1443          87 :         t_key cindex;
    1444             : 
    1445          87 :         pn = t->kv;
    1446          87 :         cindex = 0;
    1447             : 
    1448          87 :         n = get_child_rcu(pn, cindex);
    1449          87 :         if (!n) {
    1450           0 :                 trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
    1451           0 :                 return -EAGAIN;
    1452             :         }
    1453             : 
    1454             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    1455             :         this_cpu_inc(stats->gets);
    1456             : #endif
    1457             : 
    1458             :         /* Step 1: Travel to the longest prefix match in the trie */
    1459         330 :         for (;;) {
    1460         330 :                 index = get_cindex(key, n);
    1461             : 
    1462             :                 /* This bit of code is a bit tricky but it combines multiple
    1463             :                  * checks into a single check.  The prefix consists of the
    1464             :                  * prefix plus zeros for the "bits" in the prefix. The index
    1465             :                  * is the difference between the key and this value.  From
    1466             :                  * this we can actually derive several pieces of data.
    1467             :                  *   if (index >= (1ul << bits))
    1468             :                  *     we have a mismatch in skip bits and failed
    1469             :                  *   else
    1470             :                  *     we know the value is cindex
    1471             :                  *
    1472             :                  * This check is safe even if bits == KEYLENGTH due to the
    1473             :                  * fact that we can only allocate a node with 32 bits if a
    1474             :                  * long is greater than 32 bits.
    1475             :                  */
    1476         330 :                 if (index >= (1ul << n->bits))
    1477             :                         break;
    1478             : 
    1479             :                 /* we have found a leaf. Prefixes have already been compared */
    1480         281 :                 if (IS_LEAF(n))
    1481          38 :                         goto found;
    1482             : 
    1483             :                 /* only record pn and cindex if we are going to be chopping
    1484             :                  * bits later.  Otherwise we are just wasting cycles.
    1485             :                  */
    1486         243 :                 if (n->slen > n->pos) {
    1487         237 :                         pn = n;
    1488         237 :                         cindex = index;
    1489             :                 }
    1490             : 
    1491         243 :                 n = get_child_rcu(n, index);
    1492         243 :                 if (unlikely(!n))
    1493           0 :                         goto backtrace;
    1494             :         }
    1495             : 
    1496             :         /* Step 2: Sort out leaves and begin backtracing for longest prefix */
    1497          49 :         for (;;) {
    1498             :                 /* record the pointer where our next node pointer is stored */
    1499          49 :                 struct key_vector __rcu **cptr = n->tnode;
    1500             : 
    1501             :                 /* This test verifies that none of the bits that differ
    1502             :                  * between the key and the prefix exist in the region of
    1503             :                  * the lsb and higher in the prefix.
    1504             :                  */
    1505          49 :                 if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
    1506          20 :                         goto backtrace;
    1507             : 
    1508             :                 /* exit out and process leaf */
    1509          29 :                 if (unlikely(IS_LEAF(n)))
    1510             :                         break;
    1511             : 
    1512             :                 /* Don't bother recording parent info.  Since we are in
    1513             :                  * prefix match mode we will have to come back to wherever
    1514             :                  * we started this traversal anyway
    1515             :                  */
    1516             : 
    1517           0 :                 while ((n = rcu_dereference(*cptr)) == NULL) {
    1518           0 : backtrace:
    1519             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    1520             :                         if (!n)
    1521             :                                 this_cpu_inc(stats->null_node_hit);
    1522             : #endif
    1523             :                         /* If we are at cindex 0 there are no more bits for
    1524             :                          * us to strip at this level so we must ascend back
    1525             :                          * up one level to see if there are any more bits to
    1526             :                          * be stripped there.
    1527             :                          */
    1528          20 :                         while (!cindex) {
    1529          20 :                                 t_key pkey = pn->key;
    1530             : 
    1531             :                                 /* If we don't have a parent then there is
    1532             :                                  * nothing for us to do as we do not have any
    1533             :                                  * further nodes to parse.
    1534             :                                  */
    1535          20 :                                 if (IS_TRIE(pn)) {
    1536          20 :                                         trace_fib_table_lookup(tb->tb_id, flp,
    1537             :                                                                NULL, -EAGAIN);
    1538          20 :                                         return -EAGAIN;
    1539             :                                 }
    1540             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    1541             :                                 this_cpu_inc(stats->backtrack);
    1542             : #endif
    1543             :                                 /* Get Child's index */
    1544           0 :                                 pn = node_parent_rcu(pn);
    1545           0 :                                 cindex = get_index(pkey, pn);
    1546             :                         }
    1547             : 
    1548             :                         /* strip the least significant bit from the cindex */
    1549           0 :                         cindex &= cindex - 1;
    1550             : 
    1551             :                         /* grab pointer for next child node */
    1552           0 :                         cptr = &pn->tnode[cindex];
    1553             :                 }
    1554             :         }
    1555             : 
    1556          29 : found:
    1557             :         /* this line carries forward the xor from earlier in the function */
    1558          67 :         index = key ^ n->key;
    1559             : 
    1560             :         /* Step 3: Process the leaf, if that fails fall back to backtracing */
    1561         163 :         hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
    1562          96 :                 struct fib_info *fi = fa->fa_info;
    1563          96 :                 struct fib_nh_common *nhc;
    1564          96 :                 int nhsel, err;
    1565             : 
    1566          96 :                 if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
    1567          96 :                         if (index >= (1ul << fa->fa_slen))
    1568          29 :                                 continue;
    1569             :                 }
    1570          67 :                 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
    1571           0 :                         continue;
    1572          67 :                 if (fi->fib_dead)
    1573           0 :                         continue;
    1574          67 :                 if (fa->fa_info->fib_scope < flp->flowi4_scope)
    1575           0 :                         continue;
    1576          67 :                 fib_alias_accessed(fa);
    1577          67 :                 err = fib_props[fa->fa_type].error;
    1578          67 :                 if (unlikely(err < 0)) {
    1579           0 : out_reject:
    1580             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    1581             :                         this_cpu_inc(stats->semantic_match_passed);
    1582             : #endif
    1583           0 :                         trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
    1584          67 :                         return err;
    1585             :                 }
    1586          67 :                 if (fi->fib_flags & RTNH_F_DEAD)
    1587           0 :                         continue;
    1588             : 
    1589          67 :                 if (unlikely(fi->nh)) {
    1590           0 :                         if (nexthop_is_blackhole(fi->nh)) {
    1591           0 :                                 err = fib_props[RTN_BLACKHOLE].error;
    1592           0 :                                 goto out_reject;
    1593             :                         }
    1594             : 
    1595           0 :                         nhc = nexthop_get_nhc_lookup(fi->nh, fib_flags, flp,
    1596             :                                                      &nhsel);
    1597           0 :                         if (nhc)
    1598           0 :                                 goto set_result;
    1599           0 :                         goto miss;
    1600             :                 }
    1601             : 
    1602         134 :                 for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) {
    1603          67 :                         nhc = fib_info_nhc(fi, nhsel);
    1604             : 
    1605          67 :                         if (!fib_lookup_good_nhc(nhc, fib_flags, flp))
    1606           0 :                                 continue;
    1607          67 : set_result:
    1608          67 :                         if (!(fib_flags & FIB_LOOKUP_NOREF))
    1609           0 :                                 refcount_inc(&fi->fib_clntref);
    1610             : 
    1611          67 :                         res->prefix = htonl(n->key);
    1612          67 :                         res->prefixlen = KEYLENGTH - fa->fa_slen;
    1613          67 :                         res->nh_sel = nhsel;
    1614          67 :                         res->nhc = nhc;
    1615          67 :                         res->type = fa->fa_type;
    1616          67 :                         res->scope = fi->fib_scope;
    1617          67 :                         res->fi = fi;
    1618          67 :                         res->table = tb;
    1619          67 :                         res->fa_head = &n->leaf;
    1620             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    1621             :                         this_cpu_inc(stats->semantic_match_passed);
    1622             : #endif
    1623          67 :                         trace_fib_table_lookup(tb->tb_id, flp, nhc, err);
    1624             : 
    1625          67 :                         return err;
    1626             :                 }
    1627             :         }
    1628           0 : miss:
    1629             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    1630             :         this_cpu_inc(stats->semantic_match_miss);
    1631             : #endif
    1632           0 :         goto backtrace;
    1633             : }
    1634             : EXPORT_SYMBOL_GPL(fib_table_lookup);
    1635             : 
    1636           0 : static void fib_remove_alias(struct trie *t, struct key_vector *tp,
    1637             :                              struct key_vector *l, struct fib_alias *old)
    1638             : {
    1639             :         /* record the location of the previous list_info entry */
    1640           0 :         struct hlist_node **pprev = old->fa_list.pprev;
    1641           0 :         struct fib_alias *fa = hlist_entry(pprev, typeof(*fa), fa_list.next);
    1642             : 
    1643             :         /* remove the fib_alias from the list */
    1644           0 :         hlist_del_rcu(&old->fa_list);
    1645             : 
    1646             :         /* if we emptied the list this leaf will be freed and we can sort
    1647             :          * out parent suffix lengths as a part of trie_rebalance
    1648             :          */
    1649           0 :         if (hlist_empty(&l->leaf)) {
    1650           0 :                 if (tp->slen == l->slen)
    1651           0 :                         node_pull_suffix(tp, tp->pos);
    1652           0 :                 put_child_root(tp, l->key, NULL);
    1653           0 :                 node_free(l);
    1654           0 :                 trie_rebalance(t, tp);
    1655           0 :                 return;
    1656             :         }
    1657             : 
    1658             :         /* only access fa if it is pointing at the last valid hlist_node */
    1659           0 :         if (*pprev)
    1660             :                 return;
    1661             : 
    1662             :         /* update the trie with the latest suffix length */
    1663           0 :         l->slen = fa->fa_slen;
    1664           0 :         node_pull_suffix(tp, fa->fa_slen);
    1665             : }
    1666             : 
    1667           0 : static void fib_notify_alias_delete(struct net *net, u32 key,
    1668             :                                     struct hlist_head *fah,
    1669             :                                     struct fib_alias *fa_to_delete,
    1670             :                                     struct netlink_ext_ack *extack)
    1671             : {
    1672           0 :         struct fib_alias *fa_next, *fa_to_notify;
    1673           0 :         u32 tb_id = fa_to_delete->tb_id;
    1674           0 :         u8 slen = fa_to_delete->fa_slen;
    1675           0 :         enum fib_event_type fib_event;
    1676             : 
    1677             :         /* Do not notify if we do not care about the route. */
    1678           0 :         if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete)
    1679             :                 return;
    1680             : 
    1681             :         /* Determine if the route should be replaced by the next route in the
    1682             :          * list.
    1683             :          */
    1684           0 :         fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
    1685             :                                    struct fib_alias, fa_list);
    1686           0 :         if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
    1687             :                 fib_event = FIB_EVENT_ENTRY_REPLACE;
    1688             :                 fa_to_notify = fa_next;
    1689             :         } else {
    1690           0 :                 fib_event = FIB_EVENT_ENTRY_DEL;
    1691           0 :                 fa_to_notify = fa_to_delete;
    1692             :         }
    1693           0 :         call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen,
    1694             :                                  fa_to_notify, extack);
    1695             : }
    1696             : 
    1697             : /* Caller must hold RTNL. */
    1698           0 : int fib_table_delete(struct net *net, struct fib_table *tb,
    1699             :                      struct fib_config *cfg, struct netlink_ext_ack *extack)
    1700             : {
    1701           0 :         struct trie *t = (struct trie *) tb->tb_data;
    1702           0 :         struct fib_alias *fa, *fa_to_delete;
    1703           0 :         struct key_vector *l, *tp;
    1704           0 :         u8 plen = cfg->fc_dst_len;
    1705           0 :         u8 slen = KEYLENGTH - plen;
    1706           0 :         u8 tos = cfg->fc_tos;
    1707           0 :         u32 key;
    1708             : 
    1709           0 :         key = ntohl(cfg->fc_dst);
    1710             : 
    1711           0 :         if (!fib_valid_key_len(key, plen, extack))
    1712             :                 return -EINVAL;
    1713             : 
    1714           0 :         l = fib_find_node(t, &tp, key);
    1715           0 :         if (!l)
    1716             :                 return -ESRCH;
    1717             : 
    1718           0 :         fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
    1719           0 :         if (!fa)
    1720             :                 return -ESRCH;
    1721             : 
    1722             :         pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
    1723             : 
    1724           0 :         fa_to_delete = NULL;
    1725           0 :         hlist_for_each_entry_from(fa, fa_list) {
    1726           0 :                 struct fib_info *fi = fa->fa_info;
    1727             : 
    1728           0 :                 if ((fa->fa_slen != slen) ||
    1729           0 :                     (fa->tb_id != tb->tb_id) ||
    1730           0 :                     (fa->fa_tos != tos))
    1731             :                         break;
    1732             : 
    1733           0 :                 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
    1734           0 :                     (cfg->fc_scope == RT_SCOPE_NOWHERE ||
    1735           0 :                      fa->fa_info->fib_scope == cfg->fc_scope) &&
    1736           0 :                     (!cfg->fc_prefsrc ||
    1737           0 :                      fi->fib_prefsrc == cfg->fc_prefsrc) &&
    1738           0 :                     (!cfg->fc_protocol ||
    1739           0 :                      fi->fib_protocol == cfg->fc_protocol) &&
    1740           0 :                     fib_nh_match(net, cfg, fi, extack) == 0 &&
    1741           0 :                     fib_metrics_match(cfg, fi)) {
    1742             :                         fa_to_delete = fa;
    1743             :                         break;
    1744             :                 }
    1745             :         }
    1746             : 
    1747           0 :         if (!fa_to_delete)
    1748             :                 return -ESRCH;
    1749             : 
    1750           0 :         fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);
    1751           0 :         rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
    1752           0 :                   &cfg->fc_nlinfo, 0);
    1753             : 
    1754           0 :         if (!plen)
    1755           0 :                 tb->tb_num_default--;
    1756             : 
    1757           0 :         fib_remove_alias(t, tp, l, fa_to_delete);
    1758             : 
    1759           0 :         if (fa_to_delete->fa_state & FA_S_ACCESSED)
    1760           0 :                 rt_cache_flush(cfg->fc_nlinfo.nl_net);
    1761             : 
    1762           0 :         fib_release_info(fa_to_delete->fa_info);
    1763           0 :         alias_free_mem_rcu(fa_to_delete);
    1764           0 :         return 0;
    1765             : }
    1766             : 
    1767             : /* Scan for the next leaf starting at the provided key value */
    1768          16 : static struct key_vector *leaf_walk_rcu(struct key_vector **tn, t_key key)
    1769             : {
    1770          16 :         struct key_vector *pn, *n = *tn;
    1771          20 :         unsigned long cindex;
    1772             : 
    1773             :         /* this loop is meant to try and find the key in the trie */
    1774          20 :         do {
    1775             :                 /* record parent and next child index */
    1776          20 :                 pn = n;
    1777          20 :                 cindex = (key > pn->key) ? get_index(key, pn) : 0;
    1778             : 
    1779          20 :                 if (cindex >> pn->bits)
    1780             :                         break;
    1781             : 
    1782             :                 /* descend into the next child */
    1783          12 :                 n = get_child_rcu(pn, cindex++);
    1784          12 :                 if (!n)
    1785             :                         break;
    1786             : 
    1787             :                 /* guarantee forward progress on the keys */
    1788          12 :                 if (IS_LEAF(n) && (n->key >= key))
    1789           4 :                         goto found;
    1790           8 :         } while (IS_TNODE(n));
    1791             : 
    1792             :         /* this loop will search for the next leaf with a greater key */
    1793          52 :         while (!IS_TRIE(pn)) {
    1794             :                 /* if we exhausted the parent node we will need to climb */
    1795          50 :                 if (cindex >= (1ul << pn->bits)) {
    1796          12 :                         t_key pkey = pn->key;
    1797             : 
    1798          12 :                         pn = node_parent_rcu(pn);
    1799          12 :                         cindex = get_index(pkey, pn) + 1;
    1800          12 :                         continue;
    1801             :                 }
    1802             : 
    1803             :                 /* grab the next available node */
    1804          38 :                 n = get_child_rcu(pn, cindex++);
    1805          38 :                 if (!n)
    1806          20 :                         continue;
    1807             : 
    1808             :                 /* no need to compare keys since we bumped the index */
    1809          18 :                 if (IS_LEAF(n))
    1810          10 :                         goto found;
    1811             : 
    1812             :                 /* Rescan start scanning in new node */
    1813             :                 pn = n;
    1814             :                 cindex = 0;
    1815             :         }
    1816             : 
    1817           2 :         *tn = pn;
    1818           2 :         return NULL; /* Root of trie */
    1819          14 : found:
    1820             :         /* if we are at the limit for keys just return NULL for the tnode */
    1821          14 :         *tn = pn;
    1822          14 :         return n;
    1823             : }
    1824             : 
    1825           0 : static void fib_trie_free(struct fib_table *tb)
    1826             : {
    1827           0 :         struct trie *t = (struct trie *)tb->tb_data;
    1828           0 :         struct key_vector *pn = t->kv;
    1829           0 :         unsigned long cindex = 1;
    1830           0 :         struct hlist_node *tmp;
    1831           0 :         struct fib_alias *fa;
    1832             : 
    1833             :         /* walk trie in reverse order and free everything */
    1834           0 :         for (;;) {
    1835           0 :                 struct key_vector *n;
    1836             : 
    1837           0 :                 if (!(cindex--)) {
    1838           0 :                         t_key pkey = pn->key;
    1839             : 
    1840           0 :                         if (IS_TRIE(pn))
    1841             :                                 break;
    1842             : 
    1843           0 :                         n = pn;
    1844           0 :                         pn = node_parent(pn);
    1845             : 
    1846             :                         /* drop emptied tnode */
    1847           0 :                         put_child_root(pn, n->key, NULL);
    1848           0 :                         node_free(n);
    1849             : 
    1850           0 :                         cindex = get_index(pkey, pn);
    1851             : 
    1852           0 :                         continue;
    1853             :                 }
    1854             : 
    1855             :                 /* grab the next available node */
    1856           0 :                 n = get_child(pn, cindex);
    1857           0 :                 if (!n)
    1858           0 :                         continue;
    1859             : 
    1860           0 :                 if (IS_TNODE(n)) {
    1861             :                         /* record pn and cindex for leaf walking */
    1862           0 :                         pn = n;
    1863           0 :                         cindex = 1ul << n->bits;
    1864             : 
    1865           0 :                         continue;
    1866             :                 }
    1867             : 
    1868           0 :                 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
    1869           0 :                         hlist_del_rcu(&fa->fa_list);
    1870           0 :                         alias_free_mem_rcu(fa);
    1871             :                 }
    1872             : 
    1873           0 :                 put_child_root(pn, n->key, NULL);
    1874           0 :                 node_free(n);
    1875             :         }
    1876             : 
    1877             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    1878             :         free_percpu(t->stats);
    1879             : #endif
    1880           0 :         kfree(tb);
    1881           0 : }
    1882             : 
    1883           0 : struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
    1884             : {
    1885           0 :         struct trie *ot = (struct trie *)oldtb->tb_data;
    1886           0 :         struct key_vector *l, *tp = ot->kv;
    1887           0 :         struct fib_table *local_tb;
    1888           0 :         struct fib_alias *fa;
    1889           0 :         struct trie *lt;
    1890           0 :         t_key key = 0;
    1891             : 
    1892           0 :         if (oldtb->tb_data == oldtb->__data)
    1893             :                 return oldtb;
    1894             : 
    1895           0 :         local_tb = fib_trie_table(RT_TABLE_LOCAL, NULL);
    1896           0 :         if (!local_tb)
    1897             :                 return NULL;
    1898             : 
    1899           0 :         lt = (struct trie *)local_tb->tb_data;
    1900             : 
    1901           0 :         while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
    1902           0 :                 struct key_vector *local_l = NULL, *local_tp;
    1903             : 
    1904           0 :                 hlist_for_each_entry(fa, &l->leaf, fa_list) {
    1905           0 :                         struct fib_alias *new_fa;
    1906             : 
    1907           0 :                         if (local_tb->tb_id != fa->tb_id)
    1908           0 :                                 continue;
    1909             : 
    1910             :                         /* clone fa for new local table */
    1911           0 :                         new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
    1912           0 :                         if (!new_fa)
    1913           0 :                                 goto out;
    1914             : 
    1915           0 :                         memcpy(new_fa, fa, sizeof(*fa));
    1916             : 
    1917             :                         /* insert clone into table */
    1918           0 :                         if (!local_l)
    1919           0 :                                 local_l = fib_find_node(lt, &local_tp, l->key);
    1920             : 
    1921           0 :                         if (fib_insert_alias(lt, local_tp, local_l, new_fa,
    1922             :                                              NULL, l->key)) {
    1923           0 :                                 kmem_cache_free(fn_alias_kmem, new_fa);
    1924           0 :                                 goto out;
    1925             :                         }
    1926             :                 }
    1927             : 
    1928             :                 /* stop loop if key wrapped back to 0 */
    1929           0 :                 key = l->key + 1;
    1930           0 :                 if (key < l->key)
    1931             :                         break;
    1932             :         }
    1933             : 
    1934             :         return local_tb;
    1935           0 : out:
    1936           0 :         fib_trie_free(local_tb);
    1937             : 
    1938           0 :         return NULL;
    1939             : }
    1940             : 
    1941             : /* Caller must hold RTNL */
    1942           0 : void fib_table_flush_external(struct fib_table *tb)
    1943             : {
    1944           0 :         struct trie *t = (struct trie *)tb->tb_data;
    1945           0 :         struct key_vector *pn = t->kv;
    1946           0 :         unsigned long cindex = 1;
    1947           0 :         struct hlist_node *tmp;
    1948           0 :         struct fib_alias *fa;
    1949             : 
    1950             :         /* walk trie in reverse order */
    1951           0 :         for (;;) {
    1952           0 :                 unsigned char slen = 0;
    1953           0 :                 struct key_vector *n;
    1954             : 
    1955           0 :                 if (!(cindex--)) {
    1956           0 :                         t_key pkey = pn->key;
    1957             : 
    1958             :                         /* cannot resize the trie vector */
    1959           0 :                         if (IS_TRIE(pn))
    1960             :                                 break;
    1961             : 
    1962             :                         /* update the suffix to address pulled leaves */
    1963           0 :                         if (pn->slen > pn->pos)
    1964           0 :                                 update_suffix(pn);
    1965             : 
    1966             :                         /* resize completed node */
    1967           0 :                         pn = resize(t, pn);
    1968           0 :                         cindex = get_index(pkey, pn);
    1969             : 
    1970           0 :                         continue;
    1971             :                 }
    1972             : 
    1973             :                 /* grab the next available node */
    1974           0 :                 n = get_child(pn, cindex);
    1975           0 :                 if (!n)
    1976           0 :                         continue;
    1977             : 
    1978           0 :                 if (IS_TNODE(n)) {
    1979             :                         /* record pn and cindex for leaf walking */
    1980           0 :                         pn = n;
    1981           0 :                         cindex = 1ul << n->bits;
    1982             : 
    1983           0 :                         continue;
    1984             :                 }
    1985             : 
    1986           0 :                 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
    1987             :                         /* if alias was cloned to local then we just
    1988             :                          * need to remove the local copy from main
    1989             :                          */
    1990           0 :                         if (tb->tb_id != fa->tb_id) {
    1991           0 :                                 hlist_del_rcu(&fa->fa_list);
    1992           0 :                                 alias_free_mem_rcu(fa);
    1993           0 :                                 continue;
    1994             :                         }
    1995             : 
    1996             :                         /* record local slen */
    1997           0 :                         slen = fa->fa_slen;
    1998             :                 }
    1999             : 
    2000             :                 /* update leaf slen */
    2001           0 :                 n->slen = slen;
    2002             : 
    2003           0 :                 if (hlist_empty(&n->leaf)) {
    2004           0 :                         put_child_root(pn, n->key, NULL);
    2005           0 :                         node_free(n);
    2006             :                 }
    2007             :         }
    2008           0 : }
    2009             : 
    2010             : /* Caller must hold RTNL. */
    2011           0 : int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
    2012             : {
    2013           0 :         struct trie *t = (struct trie *)tb->tb_data;
    2014           0 :         struct key_vector *pn = t->kv;
    2015           0 :         unsigned long cindex = 1;
    2016           0 :         struct hlist_node *tmp;
    2017           0 :         struct fib_alias *fa;
    2018           0 :         int found = 0;
    2019             : 
    2020             :         /* walk trie in reverse order */
    2021           0 :         for (;;) {
    2022           0 :                 unsigned char slen = 0;
    2023           0 :                 struct key_vector *n;
    2024             : 
    2025           0 :                 if (!(cindex--)) {
    2026           0 :                         t_key pkey = pn->key;
    2027             : 
    2028             :                         /* cannot resize the trie vector */
    2029           0 :                         if (IS_TRIE(pn))
    2030             :                                 break;
    2031             : 
    2032             :                         /* update the suffix to address pulled leaves */
    2033           0 :                         if (pn->slen > pn->pos)
    2034           0 :                                 update_suffix(pn);
    2035             : 
    2036             :                         /* resize completed node */
    2037           0 :                         pn = resize(t, pn);
    2038           0 :                         cindex = get_index(pkey, pn);
    2039             : 
    2040           0 :                         continue;
    2041             :                 }
    2042             : 
    2043             :                 /* grab the next available node */
    2044           0 :                 n = get_child(pn, cindex);
    2045           0 :                 if (!n)
    2046           0 :                         continue;
    2047             : 
    2048           0 :                 if (IS_TNODE(n)) {
    2049             :                         /* record pn and cindex for leaf walking */
    2050           0 :                         pn = n;
    2051           0 :                         cindex = 1ul << n->bits;
    2052             : 
    2053           0 :                         continue;
    2054             :                 }
    2055             : 
    2056           0 :                 hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
    2057           0 :                         struct fib_info *fi = fa->fa_info;
    2058             : 
    2059           0 :                         if (!fi || tb->tb_id != fa->tb_id ||
    2060           0 :                             (!(fi->fib_flags & RTNH_F_DEAD) &&
    2061           0 :                              !fib_props[fa->fa_type].error)) {
    2062           0 :                                 slen = fa->fa_slen;
    2063           0 :                                 continue;
    2064             :                         }
    2065             : 
    2066             :                         /* Do not flush error routes if network namespace is
    2067             :                          * not being dismantled
    2068             :                          */
    2069           0 :                         if (!flush_all && fib_props[fa->fa_type].error) {
    2070           0 :                                 slen = fa->fa_slen;
    2071           0 :                                 continue;
    2072             :                         }
    2073             : 
    2074           0 :                         fib_notify_alias_delete(net, n->key, &n->leaf, fa,
    2075             :                                                 NULL);
    2076           0 :                         hlist_del_rcu(&fa->fa_list);
    2077           0 :                         fib_release_info(fa->fa_info);
    2078           0 :                         alias_free_mem_rcu(fa);
    2079           0 :                         found++;
    2080             :                 }
    2081             : 
    2082             :                 /* update leaf slen */
    2083           0 :                 n->slen = slen;
    2084             : 
    2085           0 :                 if (hlist_empty(&n->leaf)) {
    2086           0 :                         put_child_root(pn, n->key, NULL);
    2087           0 :                         node_free(n);
    2088             :                 }
    2089             :         }
    2090             : 
    2091           0 :         pr_debug("trie_flush found=%d\n", found);
    2092           0 :         return found;
    2093             : }
    2094             : 
    2095             : /* derived from fib_trie_free */
    2096           0 : static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
    2097             :                                      struct nl_info *info)
    2098             : {
    2099           0 :         struct trie *t = (struct trie *)tb->tb_data;
    2100           0 :         struct key_vector *pn = t->kv;
    2101           0 :         unsigned long cindex = 1;
    2102           0 :         struct fib_alias *fa;
    2103             : 
    2104           0 :         for (;;) {
    2105           0 :                 struct key_vector *n;
    2106             : 
    2107           0 :                 if (!(cindex--)) {
    2108           0 :                         t_key pkey = pn->key;
    2109             : 
    2110           0 :                         if (IS_TRIE(pn))
    2111             :                                 break;
    2112             : 
    2113           0 :                         pn = node_parent(pn);
    2114           0 :                         cindex = get_index(pkey, pn);
    2115           0 :                         continue;
    2116             :                 }
    2117             : 
    2118             :                 /* grab the next available node */
    2119           0 :                 n = get_child(pn, cindex);
    2120           0 :                 if (!n)
    2121           0 :                         continue;
    2122             : 
    2123           0 :                 if (IS_TNODE(n)) {
    2124             :                         /* record pn and cindex for leaf walking */
    2125           0 :                         pn = n;
    2126           0 :                         cindex = 1ul << n->bits;
    2127             : 
    2128           0 :                         continue;
    2129             :                 }
    2130             : 
    2131           0 :                 hlist_for_each_entry(fa, &n->leaf, fa_list) {
    2132           0 :                         struct fib_info *fi = fa->fa_info;
    2133             : 
    2134           0 :                         if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
    2135           0 :                                 continue;
    2136             : 
    2137           0 :                         rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
    2138           0 :                                   KEYLENGTH - fa->fa_slen, tb->tb_id,
    2139             :                                   info, NLM_F_REPLACE);
    2140             :                 }
    2141             :         }
    2142           0 : }
    2143             : 
    2144           0 : void fib_info_notify_update(struct net *net, struct nl_info *info)
    2145             : {
    2146           0 :         unsigned int h;
    2147             : 
    2148           0 :         for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
    2149           0 :                 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
    2150           0 :                 struct fib_table *tb;
    2151             : 
    2152           0 :                 hlist_for_each_entry_rcu(tb, head, tb_hlist,
    2153             :                                          lockdep_rtnl_is_held())
    2154           0 :                         __fib_info_notify_update(net, tb, info);
    2155             :         }
    2156           0 : }
    2157             : 
    2158           0 : static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
    2159             :                            struct notifier_block *nb,
    2160             :                            struct netlink_ext_ack *extack)
    2161             : {
    2162           0 :         struct fib_alias *fa;
    2163           0 :         int last_slen = -1;
    2164           0 :         int err;
    2165             : 
    2166           0 :         hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
    2167           0 :                 struct fib_info *fi = fa->fa_info;
    2168             : 
    2169           0 :                 if (!fi)
    2170           0 :                         continue;
    2171             : 
    2172             :                 /* local and main table can share the same trie,
    2173             :                  * so don't notify twice for the same entry.
    2174             :                  */
    2175           0 :                 if (tb->tb_id != fa->tb_id)
    2176           0 :                         continue;
    2177             : 
    2178           0 :                 if (fa->fa_slen == last_slen)
    2179           0 :                         continue;
    2180             : 
    2181           0 :                 last_slen = fa->fa_slen;
    2182           0 :                 err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
    2183           0 :                                               l->key, KEYLENGTH - fa->fa_slen,
    2184             :                                               fa, extack);
    2185           0 :                 if (err)
    2186           0 :                         return err;
    2187             :         }
    2188             :         return 0;
    2189             : }
    2190             : 
    2191           0 : static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb,
    2192             :                             struct netlink_ext_ack *extack)
    2193             : {
    2194           0 :         struct trie *t = (struct trie *)tb->tb_data;
    2195           0 :         struct key_vector *l, *tp = t->kv;
    2196           0 :         t_key key = 0;
    2197           0 :         int err;
    2198             : 
    2199           0 :         while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
    2200           0 :                 err = fib_leaf_notify(l, tb, nb, extack);
    2201           0 :                 if (err)
    2202           0 :                         return err;
    2203             : 
    2204           0 :                 key = l->key + 1;
    2205             :                 /* stop in case of wrap around */
    2206           0 :                 if (key < l->key)
    2207             :                         break;
    2208             :         }
    2209             :         return 0;
    2210             : }
    2211             : 
    2212           0 : int fib_notify(struct net *net, struct notifier_block *nb,
    2213             :                struct netlink_ext_ack *extack)
    2214             : {
    2215           0 :         unsigned int h;
    2216           0 :         int err;
    2217             : 
    2218           0 :         for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
    2219           0 :                 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
    2220           0 :                 struct fib_table *tb;
    2221             : 
    2222           0 :                 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
    2223           0 :                         err = fib_table_notify(tb, nb, extack);
    2224           0 :                         if (err)
    2225           0 :                                 return err;
    2226             :                 }
    2227             :         }
    2228             :         return 0;
    2229             : }
    2230             : 
    2231           0 : static void __trie_free_rcu(struct rcu_head *head)
    2232             : {
    2233           0 :         struct fib_table *tb = container_of(head, struct fib_table, rcu);
    2234             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    2235             :         struct trie *t = (struct trie *)tb->tb_data;
    2236             : 
    2237             :         if (tb->tb_data == tb->__data)
    2238             :                 free_percpu(t->stats);
    2239             : #endif /* CONFIG_IP_FIB_TRIE_STATS */
    2240           0 :         kfree(tb);
    2241           0 : }
    2242             : 
    2243           0 : void fib_free_table(struct fib_table *tb)
    2244             : {
    2245           0 :         call_rcu(&tb->rcu, __trie_free_rcu);
    2246           0 : }
    2247             : 
    2248          14 : static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
    2249             :                              struct sk_buff *skb, struct netlink_callback *cb,
    2250             :                              struct fib_dump_filter *filter)
    2251             : {
    2252          14 :         unsigned int flags = NLM_F_MULTI;
    2253          14 :         __be32 xkey = htonl(l->key);
    2254          14 :         int i, s_i, i_fa, s_fa, err;
    2255          14 :         struct fib_alias *fa;
    2256             : 
    2257          14 :         if (filter->filter_set ||
    2258          14 :             !filter->dump_exceptions || !filter->dump_routes)
    2259           0 :                 flags |= NLM_F_DUMP_FILTERED;
    2260             : 
    2261          14 :         s_i = cb->args[4];
    2262          14 :         s_fa = cb->args[5];
    2263          14 :         i = 0;
    2264             : 
    2265             :         /* rcu_read_lock is hold by caller */
    2266          46 :         hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
    2267          18 :                 struct fib_info *fi = fa->fa_info;
    2268             : 
    2269          18 :                 if (i < s_i)
    2270           0 :                         goto next;
    2271             : 
    2272          18 :                 i_fa = 0;
    2273             : 
    2274          18 :                 if (tb->tb_id != fa->tb_id)
    2275           9 :                         goto next;
    2276             : 
    2277           9 :                 if (filter->filter_set) {
    2278           0 :                         if (filter->rt_type && fa->fa_type != filter->rt_type)
    2279           0 :                                 goto next;
    2280             : 
    2281           0 :                         if ((filter->protocol &&
    2282           0 :                              fi->fib_protocol != filter->protocol))
    2283           0 :                                 goto next;
    2284             : 
    2285           0 :                         if (filter->dev &&
    2286           0 :                             !fib_info_nh_uses_dev(fi, filter->dev))
    2287           0 :                                 goto next;
    2288             :                 }
    2289             : 
    2290           9 :                 if (filter->dump_routes) {
    2291           9 :                         if (!s_fa) {
    2292           9 :                                 struct fib_rt_info fri;
    2293             : 
    2294           9 :                                 fri.fi = fi;
    2295           9 :                                 fri.tb_id = tb->tb_id;
    2296           9 :                                 fri.dst = xkey;
    2297           9 :                                 fri.dst_len = KEYLENGTH - fa->fa_slen;
    2298           9 :                                 fri.tos = fa->fa_tos;
    2299           9 :                                 fri.type = fa->fa_type;
    2300           9 :                                 fri.offload = fa->offload;
    2301           9 :                                 fri.trap = fa->trap;
    2302           9 :                                 fri.offload_failed = fa->offload_failed;
    2303          18 :                                 err = fib_dump_info(skb,
    2304           9 :                                                     NETLINK_CB(cb->skb).portid,
    2305           9 :                                                     cb->nlh->nlmsg_seq,
    2306             :                                                     RTM_NEWROUTE, &fri, flags);
    2307           9 :                                 if (err < 0)
    2308           0 :                                         goto stop;
    2309             :                         }
    2310             : 
    2311           9 :                         i_fa++;
    2312             :                 }
    2313             : 
    2314           9 :                 if (filter->dump_exceptions) {
    2315           9 :                         err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
    2316             :                                                  &i_fa, s_fa, flags);
    2317           9 :                         if (err < 0)
    2318           0 :                                 goto stop;
    2319             :                 }
    2320             : 
    2321           9 : next:
    2322          18 :                 i++;
    2323             :         }
    2324             : 
    2325          14 :         cb->args[4] = i;
    2326          14 :         return skb->len;
    2327             : 
    2328           0 : stop:
    2329           0 :         cb->args[4] = i;
    2330           0 :         cb->args[5] = i_fa;
    2331           0 :         return err;
    2332             : }
    2333             : 
    2334             : /* rcu_read_lock needs to be hold by caller from readside */
    2335           2 : int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
    2336             :                    struct netlink_callback *cb, struct fib_dump_filter *filter)
    2337             : {
    2338           2 :         struct trie *t = (struct trie *)tb->tb_data;
    2339           2 :         struct key_vector *l, *tp = t->kv;
    2340             :         /* Dump starting at last key.
    2341             :          * Note: 0.0.0.0/0 (ie default) is first key.
    2342             :          */
    2343           2 :         int count = cb->args[2];
    2344           2 :         t_key key = cb->args[3];
    2345             : 
    2346             :         /* First time here, count and key are both always 0. Count > 0
    2347             :          * and key == 0 means the dump has wrapped around and we are done.
    2348             :          */
    2349           2 :         if (count && !key)
    2350           0 :                 return skb->len;
    2351             : 
    2352          16 :         while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
    2353          14 :                 int err;
    2354             : 
    2355          14 :                 err = fn_trie_dump_leaf(l, tb, skb, cb, filter);
    2356          14 :                 if (err < 0) {
    2357           0 :                         cb->args[3] = key;
    2358           0 :                         cb->args[2] = count;
    2359           0 :                         return err;
    2360             :                 }
    2361             : 
    2362          14 :                 ++count;
    2363          14 :                 key = l->key + 1;
    2364             : 
    2365          14 :                 memset(&cb->args[4], 0,
    2366             :                        sizeof(cb->args) - 4*sizeof(cb->args[0]));
    2367             : 
    2368             :                 /* stop loop if key wrapped back to 0 */
    2369          14 :                 if (key < l->key)
    2370             :                         break;
    2371             :         }
    2372             : 
    2373           2 :         cb->args[3] = key;
    2374           2 :         cb->args[2] = count;
    2375             : 
    2376           2 :         return skb->len;
    2377             : }
    2378             : 
    2379           1 : void __init fib_trie_init(void)
    2380             : {
    2381           1 :         fn_alias_kmem = kmem_cache_create("ip_fib_alias",
    2382             :                                           sizeof(struct fib_alias),
    2383             :                                           0, SLAB_PANIC, NULL);
    2384             : 
    2385           1 :         trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
    2386             :                                            LEAF_SIZE,
    2387             :                                            0, SLAB_PANIC, NULL);
    2388           1 : }
    2389             : 
    2390           2 : struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
    2391             : {
    2392           2 :         struct fib_table *tb;
    2393           2 :         struct trie *t;
    2394           2 :         size_t sz = sizeof(*tb);
    2395             : 
    2396           2 :         if (!alias)
    2397           1 :                 sz += sizeof(struct trie);
    2398             : 
    2399           2 :         tb = kzalloc(sz, GFP_KERNEL);
    2400           2 :         if (!tb)
    2401             :                 return NULL;
    2402             : 
    2403           2 :         tb->tb_id = id;
    2404           2 :         tb->tb_num_default = 0;
    2405           2 :         tb->tb_data = (alias ? alias->__data : tb->__data);
    2406             : 
    2407           2 :         if (alias)
    2408             :                 return tb;
    2409             : 
    2410           1 :         t = (struct trie *) tb->tb_data;
    2411           1 :         t->kv[0].pos = KEYLENGTH;
    2412           1 :         t->kv[0].slen = KEYLENGTH;
    2413             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    2414             :         t->stats = alloc_percpu(struct trie_use_stats);
    2415             :         if (!t->stats) {
    2416             :                 kfree(tb);
    2417             :                 tb = NULL;
    2418             :         }
    2419             : #endif
    2420             : 
    2421           1 :         return tb;
    2422             : }
    2423             : 
    2424             : #ifdef CONFIG_PROC_FS
    2425             : /* Depth first Trie walk iterator */
    2426             : struct fib_trie_iter {
    2427             :         struct seq_net_private p;
    2428             :         struct fib_table *tb;
    2429             :         struct key_vector *tnode;
    2430             :         unsigned int index;
    2431             :         unsigned int depth;
    2432             : };
    2433             : 
    2434           0 : static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
    2435             : {
    2436           0 :         unsigned long cindex = iter->index;
    2437           0 :         struct key_vector *pn = iter->tnode;
    2438           0 :         t_key pkey;
    2439             : 
    2440           0 :         pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
    2441             :                  iter->tnode, iter->index, iter->depth);
    2442             : 
    2443           0 :         while (!IS_TRIE(pn)) {
    2444           0 :                 while (cindex < child_length(pn)) {
    2445           0 :                         struct key_vector *n = get_child_rcu(pn, cindex++);
    2446             : 
    2447           0 :                         if (!n)
    2448           0 :                                 continue;
    2449             : 
    2450           0 :                         if (IS_LEAF(n)) {
    2451           0 :                                 iter->tnode = pn;
    2452           0 :                                 iter->index = cindex;
    2453             :                         } else {
    2454             :                                 /* push down one level */
    2455           0 :                                 iter->tnode = n;
    2456           0 :                                 iter->index = 0;
    2457           0 :                                 ++iter->depth;
    2458             :                         }
    2459             : 
    2460             :                         return n;
    2461             :                 }
    2462             : 
    2463             :                 /* Current node exhausted, pop back up */
    2464           0 :                 pkey = pn->key;
    2465           0 :                 pn = node_parent_rcu(pn);
    2466           0 :                 cindex = get_index(pkey, pn) + 1;
    2467           0 :                 --iter->depth;
    2468             :         }
    2469             : 
    2470             :         /* record root node so further searches know we are done */
    2471           0 :         iter->tnode = pn;
    2472           0 :         iter->index = 0;
    2473             : 
    2474           0 :         return NULL;
    2475             : }
    2476             : 
    2477           0 : static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter,
    2478             :                                              struct trie *t)
    2479             : {
    2480           0 :         struct key_vector *n, *pn;
    2481             : 
    2482           0 :         if (!t)
    2483             :                 return NULL;
    2484             : 
    2485           0 :         pn = t->kv;
    2486           0 :         n = rcu_dereference(pn->tnode[0]);
    2487           0 :         if (!n)
    2488             :                 return NULL;
    2489             : 
    2490           0 :         if (IS_TNODE(n)) {
    2491           0 :                 iter->tnode = n;
    2492           0 :                 iter->index = 0;
    2493           0 :                 iter->depth = 1;
    2494             :         } else {
    2495           0 :                 iter->tnode = pn;
    2496           0 :                 iter->index = 0;
    2497           0 :                 iter->depth = 0;
    2498             :         }
    2499             : 
    2500             :         return n;
    2501             : }
    2502             : 
    2503           0 : static void trie_collect_stats(struct trie *t, struct trie_stat *s)
    2504             : {
    2505           0 :         struct key_vector *n;
    2506           0 :         struct fib_trie_iter iter;
    2507             : 
    2508           0 :         memset(s, 0, sizeof(*s));
    2509             : 
    2510           0 :         rcu_read_lock();
    2511           0 :         for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
    2512           0 :                 if (IS_LEAF(n)) {
    2513           0 :                         struct fib_alias *fa;
    2514             : 
    2515           0 :                         s->leaves++;
    2516           0 :                         s->totdepth += iter.depth;
    2517           0 :                         if (iter.depth > s->maxdepth)
    2518           0 :                                 s->maxdepth = iter.depth;
    2519             : 
    2520           0 :                         hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
    2521           0 :                                 ++s->prefixes;
    2522             :                 } else {
    2523           0 :                         s->tnodes++;
    2524           0 :                         if (n->bits < MAX_STAT_DEPTH)
    2525           0 :                                 s->nodesizes[n->bits]++;
    2526           0 :                         s->nullpointers += tn_info(n)->empty_children;
    2527             :                 }
    2528             :         }
    2529           0 :         rcu_read_unlock();
    2530           0 : }
    2531             : 
    2532             : /*
    2533             :  *      This outputs /proc/net/fib_triestats
    2534             :  */
    2535           0 : static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
    2536             : {
    2537           0 :         unsigned int i, max, pointers, bytes, avdepth;
    2538             : 
    2539           0 :         if (stat->leaves)
    2540           0 :                 avdepth = stat->totdepth*100 / stat->leaves;
    2541             :         else
    2542             :                 avdepth = 0;
    2543             : 
    2544           0 :         seq_printf(seq, "\tAver depth:     %u.%02d\n",
    2545             :                    avdepth / 100, avdepth % 100);
    2546           0 :         seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
    2547             : 
    2548           0 :         seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
    2549           0 :         bytes = LEAF_SIZE * stat->leaves;
    2550             : 
    2551           0 :         seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
    2552           0 :         bytes += sizeof(struct fib_alias) * stat->prefixes;
    2553             : 
    2554           0 :         seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
    2555           0 :         bytes += TNODE_SIZE(0) * stat->tnodes;
    2556             : 
    2557           0 :         max = MAX_STAT_DEPTH;
    2558           0 :         while (max > 0 && stat->nodesizes[max-1] == 0)
    2559             :                 max--;
    2560             : 
    2561             :         pointers = 0;
    2562           0 :         for (i = 1; i < max; i++)
    2563           0 :                 if (stat->nodesizes[i] != 0) {
    2564           0 :                         seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
    2565           0 :                         pointers += (1<<i) * stat->nodesizes[i];
    2566             :                 }
    2567           0 :         seq_putc(seq, '\n');
    2568           0 :         seq_printf(seq, "\tPointers: %u\n", pointers);
    2569             : 
    2570           0 :         bytes += sizeof(struct key_vector *) * pointers;
    2571           0 :         seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
    2572           0 :         seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);
    2573           0 : }
    2574             : 
    2575             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    2576             : static void trie_show_usage(struct seq_file *seq,
    2577             :                             const struct trie_use_stats __percpu *stats)
    2578             : {
    2579             :         struct trie_use_stats s = { 0 };
    2580             :         int cpu;
    2581             : 
    2582             :         /* loop through all of the CPUs and gather up the stats */
    2583             :         for_each_possible_cpu(cpu) {
    2584             :                 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
    2585             : 
    2586             :                 s.gets += pcpu->gets;
    2587             :                 s.backtrack += pcpu->backtrack;
    2588             :                 s.semantic_match_passed += pcpu->semantic_match_passed;
    2589             :                 s.semantic_match_miss += pcpu->semantic_match_miss;
    2590             :                 s.null_node_hit += pcpu->null_node_hit;
    2591             :                 s.resize_node_skipped += pcpu->resize_node_skipped;
    2592             :         }
    2593             : 
    2594             :         seq_printf(seq, "\nCounters:\n---------\n");
    2595             :         seq_printf(seq, "gets = %u\n", s.gets);
    2596             :         seq_printf(seq, "backtracks = %u\n", s.backtrack);
    2597             :         seq_printf(seq, "semantic match passed = %u\n",
    2598             :                    s.semantic_match_passed);
    2599             :         seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
    2600             :         seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
    2601             :         seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
    2602             : }
    2603             : #endif /*  CONFIG_IP_FIB_TRIE_STATS */
    2604             : 
    2605           0 : static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
    2606             : {
    2607           0 :         if (tb->tb_id == RT_TABLE_LOCAL)
    2608           0 :                 seq_puts(seq, "Local:\n");
    2609           0 :         else if (tb->tb_id == RT_TABLE_MAIN)
    2610           0 :                 seq_puts(seq, "Main:\n");
    2611             :         else
    2612           0 :                 seq_printf(seq, "Id %d:\n", tb->tb_id);
    2613           0 : }
    2614             : 
    2615             : 
    2616           0 : static int fib_triestat_seq_show(struct seq_file *seq, void *v)
    2617             : {
    2618           0 :         struct net *net = (struct net *)seq->private;
    2619           0 :         unsigned int h;
    2620             : 
    2621           0 :         seq_printf(seq,
    2622             :                    "Basic info: size of leaf:"
    2623             :                    " %zd bytes, size of tnode: %zd bytes.\n",
    2624             :                    LEAF_SIZE, TNODE_SIZE(0));
    2625             : 
    2626           0 :         rcu_read_lock();
    2627           0 :         for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
    2628           0 :                 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
    2629           0 :                 struct fib_table *tb;
    2630             : 
    2631           0 :                 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
    2632           0 :                         struct trie *t = (struct trie *) tb->tb_data;
    2633           0 :                         struct trie_stat stat;
    2634             : 
    2635           0 :                         if (!t)
    2636           0 :                                 continue;
    2637             : 
    2638           0 :                         fib_table_print(seq, tb);
    2639             : 
    2640           0 :                         trie_collect_stats(t, &stat);
    2641           0 :                         trie_show_stats(seq, &stat);
    2642             : #ifdef CONFIG_IP_FIB_TRIE_STATS
    2643             :                         trie_show_usage(seq, t->stats);
    2644             : #endif
    2645             :                 }
    2646           0 :                 cond_resched_rcu();
    2647             :         }
    2648           0 :         rcu_read_unlock();
    2649             : 
    2650           0 :         return 0;
    2651             : }
    2652             : 
    2653           0 : static struct key_vector *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
    2654             : {
    2655           0 :         struct fib_trie_iter *iter = seq->private;
    2656           0 :         struct net *net = seq_file_net(seq);
    2657           0 :         loff_t idx = 0;
    2658           0 :         unsigned int h;
    2659             : 
    2660           0 :         for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
    2661           0 :                 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
    2662           0 :                 struct fib_table *tb;
    2663             : 
    2664           0 :                 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
    2665           0 :                         struct key_vector *n;
    2666             : 
    2667           0 :                         for (n = fib_trie_get_first(iter,
    2668           0 :                                                     (struct trie *) tb->tb_data);
    2669           0 :                              n; n = fib_trie_get_next(iter))
    2670           0 :                                 if (pos == idx++) {
    2671           0 :                                         iter->tb = tb;
    2672           0 :                                         return n;
    2673             :                                 }
    2674             :                 }
    2675             :         }
    2676             : 
    2677             :         return NULL;
    2678             : }
    2679             : 
    2680           0 : static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
    2681             :         __acquires(RCU)
    2682             : {
    2683           0 :         rcu_read_lock();
    2684           0 :         return fib_trie_get_idx(seq, *pos);
    2685             : }
    2686             : 
    2687           0 : static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    2688             : {
    2689           0 :         struct fib_trie_iter *iter = seq->private;
    2690           0 :         struct net *net = seq_file_net(seq);
    2691           0 :         struct fib_table *tb = iter->tb;
    2692           0 :         struct hlist_node *tb_node;
    2693           0 :         unsigned int h;
    2694           0 :         struct key_vector *n;
    2695             : 
    2696           0 :         ++*pos;
    2697             :         /* next node in same table */
    2698           0 :         n = fib_trie_get_next(iter);
    2699           0 :         if (n)
    2700             :                 return n;
    2701             : 
    2702             :         /* walk rest of this hash chain */
    2703           0 :         h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
    2704           0 :         while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
    2705           0 :                 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
    2706           0 :                 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
    2707           0 :                 if (n)
    2708           0 :                         goto found;
    2709             :         }
    2710             : 
    2711             :         /* new hash chain */
    2712           0 :         while (++h < FIB_TABLE_HASHSZ) {
    2713           0 :                 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
    2714           0 :                 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
    2715           0 :                         n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
    2716           0 :                         if (n)
    2717           0 :                                 goto found;
    2718             :                 }
    2719             :         }
    2720             :         return NULL;
    2721             : 
    2722           0 : found:
    2723           0 :         iter->tb = tb;
    2724           0 :         return n;
    2725             : }
    2726             : 
    2727           0 : static void fib_trie_seq_stop(struct seq_file *seq, void *v)
    2728             :         __releases(RCU)
    2729             : {
    2730           0 :         rcu_read_unlock();
    2731           0 : }
    2732             : 
    2733           0 : static void seq_indent(struct seq_file *seq, int n)
    2734             : {
    2735           0 :         while (n-- > 0)
    2736           0 :                 seq_puts(seq, "   ");
    2737             : }
    2738             : 
    2739           0 : static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
    2740             : {
    2741           0 :         switch (s) {
    2742             :         case RT_SCOPE_UNIVERSE: return "universe";
    2743           0 :         case RT_SCOPE_SITE:     return "site";
    2744           0 :         case RT_SCOPE_LINK:     return "link";
    2745           0 :         case RT_SCOPE_HOST:     return "host";
    2746           0 :         case RT_SCOPE_NOWHERE:  return "nowhere";
    2747           0 :         default:
    2748           0 :                 snprintf(buf, len, "scope=%d", s);
    2749           0 :                 return buf;
    2750             :         }
    2751             : }
    2752             : 
    2753             : static const char *const rtn_type_names[__RTN_MAX] = {
    2754             :         [RTN_UNSPEC] = "UNSPEC",
    2755             :         [RTN_UNICAST] = "UNICAST",
    2756             :         [RTN_LOCAL] = "LOCAL",
    2757             :         [RTN_BROADCAST] = "BROADCAST",
    2758             :         [RTN_ANYCAST] = "ANYCAST",
    2759             :         [RTN_MULTICAST] = "MULTICAST",
    2760             :         [RTN_BLACKHOLE] = "BLACKHOLE",
    2761             :         [RTN_UNREACHABLE] = "UNREACHABLE",
    2762             :         [RTN_PROHIBIT] = "PROHIBIT",
    2763             :         [RTN_THROW] = "THROW",
    2764             :         [RTN_NAT] = "NAT",
    2765             :         [RTN_XRESOLVE] = "XRESOLVE",
    2766             : };
    2767             : 
    2768           0 : static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
    2769             : {
    2770           0 :         if (t < __RTN_MAX && rtn_type_names[t])
    2771             :                 return rtn_type_names[t];
    2772           0 :         snprintf(buf, len, "type %u", t);
    2773           0 :         return buf;
    2774             : }
    2775             : 
    2776             : /* Pretty print the trie */
    2777           0 : static int fib_trie_seq_show(struct seq_file *seq, void *v)
    2778             : {
    2779           0 :         const struct fib_trie_iter *iter = seq->private;
    2780           0 :         struct key_vector *n = v;
    2781             : 
    2782           0 :         if (IS_TRIE(node_parent_rcu(n)))
    2783           0 :                 fib_table_print(seq, iter->tb);
    2784             : 
    2785           0 :         if (IS_TNODE(n)) {
    2786           0 :                 __be32 prf = htonl(n->key);
    2787             : 
    2788           0 :                 seq_indent(seq, iter->depth-1);
    2789           0 :                 seq_printf(seq, "  +-- %pI4/%zu %u %u %u\n",
    2790           0 :                            &prf, KEYLENGTH - n->pos - n->bits, n->bits,
    2791           0 :                            tn_info(n)->full_children,
    2792           0 :                            tn_info(n)->empty_children);
    2793             :         } else {
    2794           0 :                 __be32 val = htonl(n->key);
    2795           0 :                 struct fib_alias *fa;
    2796             : 
    2797           0 :                 seq_indent(seq, iter->depth);
    2798           0 :                 seq_printf(seq, "  |-- %pI4\n", &val);
    2799             : 
    2800           0 :                 hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
    2801           0 :                         char buf1[32], buf2[32];
    2802             : 
    2803           0 :                         seq_indent(seq, iter->depth + 1);
    2804           0 :                         seq_printf(seq, "  /%zu %s %s",
    2805           0 :                                    KEYLENGTH - fa->fa_slen,
    2806             :                                    rtn_scope(buf1, sizeof(buf1),
    2807           0 :                                              fa->fa_info->fib_scope),
    2808             :                                    rtn_type(buf2, sizeof(buf2),
    2809           0 :                                             fa->fa_type));
    2810           0 :                         if (fa->fa_tos)
    2811           0 :                                 seq_printf(seq, " tos=%d", fa->fa_tos);
    2812           0 :                         seq_putc(seq, '\n');
    2813             :                 }
    2814             :         }
    2815             : 
    2816           0 :         return 0;
    2817             : }
    2818             : 
    2819             : static const struct seq_operations fib_trie_seq_ops = {
    2820             :         .start  = fib_trie_seq_start,
    2821             :         .next   = fib_trie_seq_next,
    2822             :         .stop   = fib_trie_seq_stop,
    2823             :         .show   = fib_trie_seq_show,
    2824             : };
    2825             : 
    2826             : struct fib_route_iter {
    2827             :         struct seq_net_private p;
    2828             :         struct fib_table *main_tb;
    2829             :         struct key_vector *tnode;
    2830             :         loff_t  pos;
    2831             :         t_key   key;
    2832             : };
    2833             : 
    2834           0 : static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
    2835             :                                             loff_t pos)
    2836             : {
    2837           0 :         struct key_vector *l, **tp = &iter->tnode;
    2838           0 :         t_key key;
    2839             : 
    2840             :         /* use cached location of previously found key */
    2841           0 :         if (iter->pos > 0 && pos >= iter->pos) {
    2842           0 :                 key = iter->key;
    2843             :         } else {
    2844           0 :                 iter->pos = 1;
    2845           0 :                 key = 0;
    2846             :         }
    2847             : 
    2848           0 :         pos -= iter->pos;
    2849             : 
    2850           0 :         while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
    2851           0 :                 key = l->key + 1;
    2852           0 :                 iter->pos++;
    2853           0 :                 l = NULL;
    2854             : 
    2855             :                 /* handle unlikely case of a key wrap */
    2856           0 :                 if (!key)
    2857             :                         break;
    2858             :         }
    2859             : 
    2860           0 :         if (l)
    2861           0 :                 iter->key = l->key;       /* remember it */
    2862             :         else
    2863           0 :                 iter->pos = 0;               /* forget it */
    2864             : 
    2865           0 :         return l;
    2866             : }
    2867             : 
    2868           0 : static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
    2869             :         __acquires(RCU)
    2870             : {
    2871           0 :         struct fib_route_iter *iter = seq->private;
    2872           0 :         struct fib_table *tb;
    2873           0 :         struct trie *t;
    2874             : 
    2875           0 :         rcu_read_lock();
    2876             : 
    2877           0 :         tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
    2878           0 :         if (!tb)
    2879             :                 return NULL;
    2880             : 
    2881           0 :         iter->main_tb = tb;
    2882           0 :         t = (struct trie *)tb->tb_data;
    2883           0 :         iter->tnode = t->kv;
    2884             : 
    2885           0 :         if (*pos != 0)
    2886           0 :                 return fib_route_get_idx(iter, *pos);
    2887             : 
    2888           0 :         iter->pos = 0;
    2889           0 :         iter->key = KEY_MAX;
    2890             : 
    2891           0 :         return SEQ_START_TOKEN;
    2892             : }
    2893             : 
    2894           0 : static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    2895             : {
    2896           0 :         struct fib_route_iter *iter = seq->private;
    2897           0 :         struct key_vector *l = NULL;
    2898           0 :         t_key key = iter->key + 1;
    2899             : 
    2900           0 :         ++*pos;
    2901             : 
    2902             :         /* only allow key of 0 for start of sequence */
    2903           0 :         if ((v == SEQ_START_TOKEN) || key)
    2904           0 :                 l = leaf_walk_rcu(&iter->tnode, key);
    2905             : 
    2906           0 :         if (l) {
    2907           0 :                 iter->key = l->key;
    2908           0 :                 iter->pos++;
    2909             :         } else {
    2910           0 :                 iter->pos = 0;
    2911             :         }
    2912             : 
    2913           0 :         return l;
    2914             : }
    2915             : 
    2916           0 : static void fib_route_seq_stop(struct seq_file *seq, void *v)
    2917             :         __releases(RCU)
    2918             : {
    2919           0 :         rcu_read_unlock();
    2920           0 : }
    2921             : 
    2922           0 : static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
    2923             : {
    2924           0 :         unsigned int flags = 0;
    2925             : 
    2926           0 :         if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
    2927           0 :                 flags = RTF_REJECT;
    2928           0 :         if (fi) {
    2929           0 :                 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
    2930             : 
    2931           0 :                 if (nhc->nhc_gw.ipv4)
    2932           0 :                         flags |= RTF_GATEWAY;
    2933             :         }
    2934           0 :         if (mask == htonl(0xFFFFFFFF))
    2935           0 :                 flags |= RTF_HOST;
    2936           0 :         flags |= RTF_UP;
    2937           0 :         return flags;
    2938             : }
    2939             : 
    2940             : /*
    2941             :  *      This outputs /proc/net/route.
    2942             :  *      The format of the file is not supposed to be changed
    2943             :  *      and needs to be same as fib_hash output to avoid breaking
    2944             :  *      legacy utilities
    2945             :  */
    2946           0 : static int fib_route_seq_show(struct seq_file *seq, void *v)
    2947             : {
    2948           0 :         struct fib_route_iter *iter = seq->private;
    2949           0 :         struct fib_table *tb = iter->main_tb;
    2950           0 :         struct fib_alias *fa;
    2951           0 :         struct key_vector *l = v;
    2952           0 :         __be32 prefix;
    2953             : 
    2954           0 :         if (v == SEQ_START_TOKEN) {
    2955           0 :                 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
    2956             :                            "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
    2957             :                            "\tWindow\tIRTT");
    2958           0 :                 return 0;
    2959             :         }
    2960             : 
    2961           0 :         prefix = htonl(l->key);
    2962             : 
    2963           0 :         hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
    2964           0 :                 struct fib_info *fi = fa->fa_info;
    2965           0 :                 __be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
    2966           0 :                 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
    2967             : 
    2968           0 :                 if ((fa->fa_type == RTN_BROADCAST) ||
    2969             :                     (fa->fa_type == RTN_MULTICAST))
    2970           0 :                         continue;
    2971             : 
    2972           0 :                 if (fa->tb_id != tb->tb_id)
    2973           0 :                         continue;
    2974             : 
    2975           0 :                 seq_setwidth(seq, 127);
    2976             : 
    2977           0 :                 if (fi) {
    2978           0 :                         struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
    2979           0 :                         __be32 gw = 0;
    2980             : 
    2981           0 :                         if (nhc->nhc_gw_family == AF_INET)
    2982           0 :                                 gw = nhc->nhc_gw.ipv4;
    2983             : 
    2984           0 :                         seq_printf(seq,
    2985             :                                    "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
    2986             :                                    "%d\t%08X\t%d\t%u\t%u",
    2987           0 :                                    nhc->nhc_dev ? nhc->nhc_dev->name : "*",
    2988             :                                    prefix, gw, flags, 0, 0,
    2989             :                                    fi->fib_priority,
    2990             :                                    mask,
    2991           0 :                                    (fi->fib_advmss ?
    2992             :                                     fi->fib_advmss + 40 : 0),
    2993             :                                    fi->fib_window,
    2994           0 :                                    fi->fib_rtt >> 3);
    2995             :                 } else {
    2996           0 :                         seq_printf(seq,
    2997             :                                    "*\t%08X\t%08X\t%04X\t%d\t%u\t"
    2998             :                                    "%d\t%08X\t%d\t%u\t%u",
    2999             :                                    prefix, 0, flags, 0, 0, 0,
    3000             :                                    mask, 0, 0, 0);
    3001             :                 }
    3002           0 :                 seq_pad(seq, '\n');
    3003             :         }
    3004             : 
    3005             :         return 0;
    3006             : }
    3007             : 
    3008             : static const struct seq_operations fib_route_seq_ops = {
    3009             :         .start  = fib_route_seq_start,
    3010             :         .next   = fib_route_seq_next,
    3011             :         .stop   = fib_route_seq_stop,
    3012             :         .show   = fib_route_seq_show,
    3013             : };
    3014             : 
    3015           1 : int __net_init fib_proc_init(struct net *net)
    3016             : {
    3017           1 :         if (!proc_create_net("fib_trie", 0444, net->proc_net, &fib_trie_seq_ops,
    3018             :                         sizeof(struct fib_trie_iter)))
    3019           0 :                 goto out1;
    3020             : 
    3021           1 :         if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
    3022             :                         fib_triestat_seq_show, NULL))
    3023           0 :                 goto out2;
    3024             : 
    3025           1 :         if (!proc_create_net("route", 0444, net->proc_net, &fib_route_seq_ops,
    3026             :                         sizeof(struct fib_route_iter)))
    3027           0 :                 goto out3;
    3028             : 
    3029             :         return 0;
    3030             : 
    3031           0 : out3:
    3032           0 :         remove_proc_entry("fib_triestat", net->proc_net);
    3033           0 : out2:
    3034           0 :         remove_proc_entry("fib_trie", net->proc_net);
    3035             : out1:
    3036             :         return -ENOMEM;
    3037             : }
    3038             : 
    3039           0 : void __net_exit fib_proc_exit(struct net *net)
    3040             : {
    3041           0 :         remove_proc_entry("fib_trie", net->proc_net);
    3042           0 :         remove_proc_entry("fib_triestat", net->proc_net);
    3043           0 :         remove_proc_entry("route", net->proc_net);
    3044           0 : }
    3045             : 
    3046             : #endif /* CONFIG_PROC_FS */

Generated by: LCOV version 1.14