LCOV - code coverage report
Current view: top level - net/ipv4 - ip_fragment.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 39 349 11.2 %
Date: 2021-04-22 12:43:58 Functions: 4 21 19.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * INET         An implementation of the TCP/IP protocol suite for the LINUX
       4             :  *              operating system.  INET is implemented using the  BSD Socket
       5             :  *              interface as the means of communication with the user level.
       6             :  *
       7             :  *              The IP fragmentation functionality.
       8             :  *
       9             :  * Authors:     Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
      10             :  *              Alan Cox <alan@lxorguk.ukuu.org.uk>
      11             :  *
      12             :  * Fixes:
      13             :  *              Alan Cox        :       Split from ip.c , see ip_input.c for history.
      14             :  *              David S. Miller :       Begin massive cleanup...
      15             :  *              Andi Kleen      :       Add sysctls.
      16             :  *              xxxx            :       Overlapfrag bug.
      17             :  *              Ultima          :       ip_expire() kernel panic.
      18             :  *              Bill Hawes      :       Frag accounting and evictor fixes.
      19             :  *              John McDonald   :       0 length frag bug.
      20             :  *              Alexey Kuznetsov:       SMP races, threading, cleanup.
      21             :  *              Patrick McHardy :       LRU queue of frag heads for evictor.
      22             :  */
      23             : 
      24             : #define pr_fmt(fmt) "IPv4: " fmt
      25             : 
      26             : #include <linux/compiler.h>
      27             : #include <linux/module.h>
      28             : #include <linux/types.h>
      29             : #include <linux/mm.h>
      30             : #include <linux/jiffies.h>
      31             : #include <linux/skbuff.h>
      32             : #include <linux/list.h>
      33             : #include <linux/ip.h>
      34             : #include <linux/icmp.h>
      35             : #include <linux/netdevice.h>
      36             : #include <linux/jhash.h>
      37             : #include <linux/random.h>
      38             : #include <linux/slab.h>
      39             : #include <net/route.h>
      40             : #include <net/dst.h>
      41             : #include <net/sock.h>
      42             : #include <net/ip.h>
      43             : #include <net/icmp.h>
      44             : #include <net/checksum.h>
      45             : #include <net/inetpeer.h>
      46             : #include <net/inet_frag.h>
      47             : #include <linux/tcp.h>
      48             : #include <linux/udp.h>
      49             : #include <linux/inet.h>
      50             : #include <linux/netfilter_ipv4.h>
      51             : #include <net/inet_ecn.h>
      52             : #include <net/l3mdev.h>
      53             : 
      54             : /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
      55             :  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
      56             :  * as well. Or notify me, at least. --ANK
      57             :  */
      58             : static const char ip_frag_cache_name[] = "ip4-frags";
      59             : 
      60             : /* Describe an entry in the "incomplete datagrams" queue. */
      61             : struct ipq {
      62             :         struct inet_frag_queue q;
      63             : 
      64             :         u8              ecn; /* RFC3168 support */
      65             :         u16             max_df_size; /* largest frag with DF set seen */
      66             :         int             iif;
      67             :         unsigned int    rid;
      68             :         struct inet_peer *peer;
      69             : };
      70             : 
      71           0 : static u8 ip4_frag_ecn(u8 tos)
      72             : {
      73           0 :         return 1 << (tos & INET_ECN_MASK);
      74             : }
      75             : 
      76             : static struct inet_frags ip4_frags;
      77             : 
      78             : static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
      79             :                          struct sk_buff *prev_tail, struct net_device *dev);
      80             : 
      81             : 
      82           0 : static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
      83             : {
      84           0 :         struct ipq *qp = container_of(q, struct ipq, q);
      85           0 :         struct net *net = q->fqdir->net;
      86             : 
      87           0 :         const struct frag_v4_compare_key *key = a;
      88             : 
      89           0 :         q->key.v4 = *key;
      90           0 :         qp->ecn = 0;
      91           0 :         qp->peer = q->fqdir->max_dist ?
      92           0 :                 inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
      93             :                 NULL;
      94           0 : }
      95             : 
      96           0 : static void ip4_frag_free(struct inet_frag_queue *q)
      97             : {
      98           0 :         struct ipq *qp;
      99             : 
     100           0 :         qp = container_of(q, struct ipq, q);
     101           0 :         if (qp->peer)
     102           0 :                 inet_putpeer(qp->peer);
     103           0 : }
     104             : 
     105             : 
     106             : /* Destruction primitives. */
     107             : 
     108           0 : static void ipq_put(struct ipq *ipq)
     109             : {
     110           0 :         inet_frag_put(&ipq->q);
     111             : }
     112             : 
     113             : /* Kill ipq entry. It is not destroyed immediately,
     114             :  * because caller (and someone more) holds reference count.
     115             :  */
     116           0 : static void ipq_kill(struct ipq *ipq)
     117             : {
     118           0 :         inet_frag_kill(&ipq->q);
     119             : }
     120             : 
     121           0 : static bool frag_expire_skip_icmp(u32 user)
     122             : {
     123           0 :         return user == IP_DEFRAG_AF_PACKET ||
     124           0 :                ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN,
     125           0 :                                          __IP_DEFRAG_CONNTRACK_IN_END) ||
     126           0 :                ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN,
     127             :                                          __IP_DEFRAG_CONNTRACK_BRIDGE_IN);
     128             : }
     129             : 
     130             : /*
     131             :  * Oops, a fragment queue timed out.  Kill it and send an ICMP reply.
     132             :  */
     133           0 : static void ip_expire(struct timer_list *t)
     134             : {
     135           0 :         struct inet_frag_queue *frag = from_timer(frag, t, timer);
     136           0 :         const struct iphdr *iph;
     137           0 :         struct sk_buff *head = NULL;
     138           0 :         struct net *net;
     139           0 :         struct ipq *qp;
     140           0 :         int err;
     141             : 
     142           0 :         qp = container_of(frag, struct ipq, q);
     143           0 :         net = qp->q.fqdir->net;
     144             : 
     145           0 :         rcu_read_lock();
     146             : 
     147           0 :         if (qp->q.fqdir->dead)
     148           0 :                 goto out_rcu_unlock;
     149             : 
     150           0 :         spin_lock(&qp->q.lock);
     151             : 
     152           0 :         if (qp->q.flags & INET_FRAG_COMPLETE)
     153           0 :                 goto out;
     154             : 
     155           0 :         ipq_kill(qp);
     156           0 :         __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
     157           0 :         __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
     158             : 
     159           0 :         if (!(qp->q.flags & INET_FRAG_FIRST_IN))
     160           0 :                 goto out;
     161             : 
     162             :         /* sk_buff::dev and sk_buff::rbnode are unionized. So we
     163             :          * pull the head out of the tree in order to be able to
     164             :          * deal with head->dev.
     165             :          */
     166           0 :         head = inet_frag_pull_head(&qp->q);
     167           0 :         if (!head)
     168           0 :                 goto out;
     169           0 :         head->dev = dev_get_by_index_rcu(net, qp->iif);
     170           0 :         if (!head->dev)
     171           0 :                 goto out;
     172             : 
     173             : 
     174             :         /* skb has no dst, perform route lookup again */
     175           0 :         iph = ip_hdr(head);
     176           0 :         err = ip_route_input_noref(head, iph->daddr, iph->saddr,
     177           0 :                                            iph->tos, head->dev);
     178           0 :         if (err)
     179           0 :                 goto out;
     180             : 
     181             :         /* Only an end host needs to send an ICMP
     182             :          * "Fragment Reassembly Timeout" message, per RFC792.
     183             :          */
     184           0 :         if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
     185           0 :             (skb_rtable(head)->rt_type != RTN_LOCAL))
     186           0 :                 goto out;
     187             : 
     188           0 :         spin_unlock(&qp->q.lock);
     189           0 :         icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
     190           0 :         goto out_rcu_unlock;
     191             : 
     192           0 : out:
     193           0 :         spin_unlock(&qp->q.lock);
     194           0 : out_rcu_unlock:
     195           0 :         rcu_read_unlock();
     196           0 :         kfree_skb(head);
     197           0 :         ipq_put(qp);
     198           0 : }
     199             : 
     200             : /* Find the correct entry in the "incomplete datagrams" queue for
     201             :  * this IP datagram, and create new one, if nothing is found.
     202             :  */
     203           0 : static struct ipq *ip_find(struct net *net, struct iphdr *iph,
     204             :                            u32 user, int vif)
     205             : {
     206           0 :         struct frag_v4_compare_key key = {
     207           0 :                 .saddr = iph->saddr,
     208           0 :                 .daddr = iph->daddr,
     209             :                 .user = user,
     210             :                 .vif = vif,
     211           0 :                 .id = iph->id,
     212           0 :                 .protocol = iph->protocol,
     213             :         };
     214           0 :         struct inet_frag_queue *q;
     215             : 
     216           0 :         q = inet_frag_find(net->ipv4.fqdir, &key);
     217           0 :         if (!q)
     218           0 :                 return NULL;
     219             : 
     220           0 :         return container_of(q, struct ipq, q);
     221             : }
     222             : 
     223             : /* Is the fragment too far ahead to be part of ipq? */
     224           0 : static int ip_frag_too_far(struct ipq *qp)
     225             : {
     226           0 :         struct inet_peer *peer = qp->peer;
     227           0 :         unsigned int max = qp->q.fqdir->max_dist;
     228           0 :         unsigned int start, end;
     229             : 
     230           0 :         int rc;
     231             : 
     232           0 :         if (!peer || !max)
     233             :                 return 0;
     234             : 
     235           0 :         start = qp->rid;
     236           0 :         end = atomic_inc_return(&peer->rid);
     237           0 :         qp->rid = end;
     238             : 
     239           0 :         rc = qp->q.fragments_tail && (end - start) > max;
     240             : 
     241           0 :         if (rc)
     242           0 :                 __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS);
     243             : 
     244             :         return rc;
     245             : }
     246             : 
     247           0 : static int ip_frag_reinit(struct ipq *qp)
     248             : {
     249           0 :         unsigned int sum_truesize = 0;
     250             : 
     251           0 :         if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
     252           0 :                 refcount_inc(&qp->q.refcnt);
     253           0 :                 return -ETIMEDOUT;
     254             :         }
     255             : 
     256           0 :         sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
     257           0 :         sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
     258             : 
     259           0 :         qp->q.flags = 0;
     260           0 :         qp->q.len = 0;
     261           0 :         qp->q.meat = 0;
     262           0 :         qp->q.rb_fragments = RB_ROOT;
     263           0 :         qp->q.fragments_tail = NULL;
     264           0 :         qp->q.last_run_head = NULL;
     265           0 :         qp->iif = 0;
     266           0 :         qp->ecn = 0;
     267             : 
     268           0 :         return 0;
     269             : }
     270             : 
     271             : /* Add new segment to existing queue. */
     272           0 : static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
     273             : {
     274           0 :         struct net *net = qp->q.fqdir->net;
     275           0 :         int ihl, end, flags, offset;
     276           0 :         struct sk_buff *prev_tail;
     277           0 :         struct net_device *dev;
     278           0 :         unsigned int fragsize;
     279           0 :         int err = -ENOENT;
     280           0 :         u8 ecn;
     281             : 
     282           0 :         if (qp->q.flags & INET_FRAG_COMPLETE)
     283           0 :                 goto err;
     284             : 
     285           0 :         if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
     286           0 :             unlikely(ip_frag_too_far(qp)) &&
     287           0 :             unlikely(err = ip_frag_reinit(qp))) {
     288           0 :                 ipq_kill(qp);
     289           0 :                 goto err;
     290             :         }
     291             : 
     292           0 :         ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
     293           0 :         offset = ntohs(ip_hdr(skb)->frag_off);
     294           0 :         flags = offset & ~IP_OFFSET;
     295           0 :         offset &= IP_OFFSET;
     296           0 :         offset <<= 3;             /* offset is in 8-byte chunks */
     297           0 :         ihl = ip_hdrlen(skb);
     298             : 
     299             :         /* Determine the position of this fragment. */
     300           0 :         end = offset + skb->len - skb_network_offset(skb) - ihl;
     301           0 :         err = -EINVAL;
     302             : 
     303             :         /* Is this the final fragment? */
     304           0 :         if ((flags & IP_MF) == 0) {
     305             :                 /* If we already have some bits beyond end
     306             :                  * or have different end, the segment is corrupted.
     307             :                  */
     308           0 :                 if (end < qp->q.len ||
     309           0 :                     ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
     310           0 :                         goto discard_qp;
     311           0 :                 qp->q.flags |= INET_FRAG_LAST_IN;
     312           0 :                 qp->q.len = end;
     313             :         } else {
     314           0 :                 if (end&7) {
     315           0 :                         end &= ~7;
     316           0 :                         if (skb->ip_summed != CHECKSUM_UNNECESSARY)
     317           0 :                                 skb->ip_summed = CHECKSUM_NONE;
     318             :                 }
     319           0 :                 if (end > qp->q.len) {
     320             :                         /* Some bits beyond end -> corruption. */
     321           0 :                         if (qp->q.flags & INET_FRAG_LAST_IN)
     322           0 :                                 goto discard_qp;
     323           0 :                         qp->q.len = end;
     324             :                 }
     325             :         }
     326           0 :         if (end == offset)
     327           0 :                 goto discard_qp;
     328             : 
     329           0 :         err = -ENOMEM;
     330           0 :         if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
     331           0 :                 goto discard_qp;
     332             : 
     333           0 :         err = pskb_trim_rcsum(skb, end - offset);
     334           0 :         if (err)
     335           0 :                 goto discard_qp;
     336             : 
     337             :         /* Note : skb->rbnode and skb->dev share the same location. */
     338           0 :         dev = skb->dev;
     339             :         /* Makes sure compiler wont do silly aliasing games */
     340           0 :         barrier();
     341             : 
     342           0 :         prev_tail = qp->q.fragments_tail;
     343           0 :         err = inet_frag_queue_insert(&qp->q, skb, offset, end);
     344           0 :         if (err)
     345           0 :                 goto insert_error;
     346             : 
     347           0 :         if (dev)
     348           0 :                 qp->iif = dev->ifindex;
     349             : 
     350           0 :         qp->q.stamp = skb->tstamp;
     351           0 :         qp->q.meat += skb->len;
     352           0 :         qp->ecn |= ecn;
     353           0 :         add_frag_mem_limit(qp->q.fqdir, skb->truesize);
     354           0 :         if (offset == 0)
     355           0 :                 qp->q.flags |= INET_FRAG_FIRST_IN;
     356             : 
     357           0 :         fragsize = skb->len + ihl;
     358             : 
     359           0 :         if (fragsize > qp->q.max_size)
     360           0 :                 qp->q.max_size = fragsize;
     361             : 
     362           0 :         if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
     363           0 :             fragsize > qp->max_df_size)
     364           0 :                 qp->max_df_size = fragsize;
     365             : 
     366           0 :         if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
     367           0 :             qp->q.meat == qp->q.len) {
     368           0 :                 unsigned long orefdst = skb->_skb_refdst;
     369             : 
     370           0 :                 skb->_skb_refdst = 0UL;
     371           0 :                 err = ip_frag_reasm(qp, skb, prev_tail, dev);
     372           0 :                 skb->_skb_refdst = orefdst;
     373           0 :                 if (err)
     374           0 :                         inet_frag_kill(&qp->q);
     375           0 :                 return err;
     376             :         }
     377             : 
     378           0 :         skb_dst_drop(skb);
     379             :         return -EINPROGRESS;
     380             : 
     381           0 : insert_error:
     382           0 :         if (err == IPFRAG_DUP) {
     383           0 :                 kfree_skb(skb);
     384           0 :                 return -EINVAL;
     385             :         }
     386           0 :         err = -EINVAL;
     387           0 :         __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
     388           0 : discard_qp:
     389           0 :         inet_frag_kill(&qp->q);
     390           0 :         __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
     391           0 : err:
     392           0 :         kfree_skb(skb);
     393           0 :         return err;
     394             : }
     395             : 
     396           0 : static bool ip_frag_coalesce_ok(const struct ipq *qp)
     397             : {
     398           0 :         return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER;
     399             : }
     400             : 
     401             : /* Build a new IP datagram from all its fragments. */
     402           0 : static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
     403             :                          struct sk_buff *prev_tail, struct net_device *dev)
     404             : {
     405           0 :         struct net *net = qp->q.fqdir->net;
     406           0 :         struct iphdr *iph;
     407           0 :         void *reasm_data;
     408           0 :         int len, err;
     409           0 :         u8 ecn;
     410             : 
     411           0 :         ipq_kill(qp);
     412             : 
     413           0 :         ecn = ip_frag_ecn_table[qp->ecn];
     414           0 :         if (unlikely(ecn == 0xff)) {
     415           0 :                 err = -EINVAL;
     416           0 :                 goto out_fail;
     417             :         }
     418             : 
     419             :         /* Make the one we just received the head. */
     420           0 :         reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
     421           0 :         if (!reasm_data)
     422           0 :                 goto out_nomem;
     423             : 
     424           0 :         len = ip_hdrlen(skb) + qp->q.len;
     425           0 :         err = -E2BIG;
     426           0 :         if (len > 65535)
     427           0 :                 goto out_oversize;
     428             : 
     429           0 :         inet_frag_reasm_finish(&qp->q, skb, reasm_data,
     430           0 :                                ip_frag_coalesce_ok(qp));
     431             : 
     432           0 :         skb->dev = dev;
     433           0 :         IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
     434             : 
     435           0 :         iph = ip_hdr(skb);
     436           0 :         iph->tot_len = htons(len);
     437           0 :         iph->tos |= ecn;
     438             : 
     439             :         /* When we set IP_DF on a refragmented skb we must also force a
     440             :          * call to ip_fragment to avoid forwarding a DF-skb of size s while
     441             :          * original sender only sent fragments of size f (where f < s).
     442             :          *
     443             :          * We only set DF/IPSKB_FRAG_PMTU if such DF fragment was the largest
     444             :          * frag seen to avoid sending tiny DF-fragments in case skb was built
     445             :          * from one very small df-fragment and one large non-df frag.
     446             :          */
     447           0 :         if (qp->max_df_size == qp->q.max_size) {
     448           0 :                 IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
     449           0 :                 iph->frag_off = htons(IP_DF);
     450             :         } else {
     451           0 :                 iph->frag_off = 0;
     452             :         }
     453             : 
     454           0 :         ip_send_check(iph);
     455             : 
     456           0 :         __IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
     457           0 :         qp->q.rb_fragments = RB_ROOT;
     458           0 :         qp->q.fragments_tail = NULL;
     459           0 :         qp->q.last_run_head = NULL;
     460           0 :         return 0;
     461             : 
     462           0 : out_nomem:
     463           0 :         net_dbg_ratelimited("queue_glue: no memory for gluing queue %p\n", qp);
     464           0 :         err = -ENOMEM;
     465           0 :         goto out_fail;
     466           0 : out_oversize:
     467           0 :         net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
     468           0 : out_fail:
     469           0 :         __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
     470           0 :         return err;
     471             : }
     472             : 
     473             : /* Process an incoming IP datagram fragment. */
     474           0 : int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
     475             : {
     476           0 :         struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
     477           0 :         int vif = l3mdev_master_ifindex_rcu(dev);
     478           0 :         struct ipq *qp;
     479             : 
     480           0 :         __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
     481           0 :         skb_orphan(skb);
     482             : 
     483             :         /* Lookup (or create) queue header */
     484           0 :         qp = ip_find(net, ip_hdr(skb), user, vif);
     485           0 :         if (qp) {
     486           0 :                 int ret;
     487             : 
     488           0 :                 spin_lock(&qp->q.lock);
     489             : 
     490           0 :                 ret = ip_frag_queue(qp, skb);
     491             : 
     492           0 :                 spin_unlock(&qp->q.lock);
     493           0 :                 ipq_put(qp);
     494           0 :                 return ret;
     495             :         }
     496             : 
     497           0 :         __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
     498           0 :         kfree_skb(skb);
     499           0 :         return -ENOMEM;
     500             : }
     501             : EXPORT_SYMBOL(ip_defrag);
     502             : 
     503           0 : struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
     504             : {
     505           0 :         struct iphdr iph;
     506           0 :         int netoff;
     507           0 :         u32 len;
     508             : 
     509           0 :         if (skb->protocol != htons(ETH_P_IP))
     510             :                 return skb;
     511             : 
     512           0 :         netoff = skb_network_offset(skb);
     513             : 
     514           0 :         if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0)
     515             :                 return skb;
     516             : 
     517           0 :         if (iph.ihl < 5 || iph.version != 4)
     518             :                 return skb;
     519             : 
     520           0 :         len = ntohs(iph.tot_len);
     521           0 :         if (skb->len < netoff + len || len < (iph.ihl * 4))
     522             :                 return skb;
     523             : 
     524           0 :         if (ip_is_fragment(&iph)) {
     525           0 :                 skb = skb_share_check(skb, GFP_ATOMIC);
     526           0 :                 if (skb) {
     527           0 :                         if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
     528           0 :                                 kfree_skb(skb);
     529           0 :                                 return NULL;
     530             :                         }
     531           0 :                         if (pskb_trim_rcsum(skb, netoff + len)) {
     532           0 :                                 kfree_skb(skb);
     533           0 :                                 return NULL;
     534             :                         }
     535           0 :                         memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
     536           0 :                         if (ip_defrag(net, skb, user))
     537             :                                 return NULL;
     538           0 :                         skb_clear_hash(skb);
     539             :                 }
     540             :         }
     541             :         return skb;
     542             : }
     543             : EXPORT_SYMBOL(ip_check_defrag);
     544             : 
     545             : #ifdef CONFIG_SYSCTL
     546             : static int dist_min;
     547             : 
     548             : static struct ctl_table ip4_frags_ns_ctl_table[] = {
     549             :         {
     550             :                 .procname       = "ipfrag_high_thresh",
     551             :                 .maxlen         = sizeof(unsigned long),
     552             :                 .mode           = 0644,
     553             :                 .proc_handler   = proc_doulongvec_minmax,
     554             :         },
     555             :         {
     556             :                 .procname       = "ipfrag_low_thresh",
     557             :                 .maxlen         = sizeof(unsigned long),
     558             :                 .mode           = 0644,
     559             :                 .proc_handler   = proc_doulongvec_minmax,
     560             :         },
     561             :         {
     562             :                 .procname       = "ipfrag_time",
     563             :                 .maxlen         = sizeof(int),
     564             :                 .mode           = 0644,
     565             :                 .proc_handler   = proc_dointvec_jiffies,
     566             :         },
     567             :         {
     568             :                 .procname       = "ipfrag_max_dist",
     569             :                 .maxlen         = sizeof(int),
     570             :                 .mode           = 0644,
     571             :                 .proc_handler   = proc_dointvec_minmax,
     572             :                 .extra1         = &dist_min,
     573             :         },
     574             :         { }
     575             : };
     576             : 
     577             : /* secret interval has been deprecated */
     578             : static int ip4_frags_secret_interval_unused;
     579             : static struct ctl_table ip4_frags_ctl_table[] = {
     580             :         {
     581             :                 .procname       = "ipfrag_secret_interval",
     582             :                 .data           = &ip4_frags_secret_interval_unused,
     583             :                 .maxlen         = sizeof(int),
     584             :                 .mode           = 0644,
     585             :                 .proc_handler   = proc_dointvec_jiffies,
     586             :         },
     587             :         { }
     588             : };
     589             : 
     590           1 : static int __net_init ip4_frags_ns_ctl_register(struct net *net)
     591             : {
     592           1 :         struct ctl_table *table;
     593           1 :         struct ctl_table_header *hdr;
     594             : 
     595           1 :         table = ip4_frags_ns_ctl_table;
     596           1 :         if (!net_eq(net, &init_net)) {
     597             :                 table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL);
     598             :                 if (!table)
     599             :                         goto err_alloc;
     600             : 
     601             :         }
     602           1 :         table[0].data   = &net->ipv4.fqdir->high_thresh;
     603           1 :         table[0].extra1 = &net->ipv4.fqdir->low_thresh;
     604           1 :         table[1].data   = &net->ipv4.fqdir->low_thresh;
     605           1 :         table[1].extra2 = &net->ipv4.fqdir->high_thresh;
     606           1 :         table[2].data   = &net->ipv4.fqdir->timeout;
     607           1 :         table[3].data   = &net->ipv4.fqdir->max_dist;
     608             : 
     609           1 :         hdr = register_net_sysctl(net, "net/ipv4", table);
     610           1 :         if (!hdr)
     611           0 :                 goto err_reg;
     612             : 
     613           1 :         net->ipv4.frags_hdr = hdr;
     614           1 :         return 0;
     615             : 
     616           0 : err_reg:
     617           0 :         if (!net_eq(net, &init_net))
     618             :                 kfree(table);
     619           0 : err_alloc:
     620           0 :         return -ENOMEM;
     621             : }
     622             : 
     623           0 : static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
     624             : {
     625           0 :         struct ctl_table *table;
     626             : 
     627           0 :         table = net->ipv4.frags_hdr->ctl_table_arg;
     628           0 :         unregister_net_sysctl_table(net->ipv4.frags_hdr);
     629           0 :         kfree(table);
     630           0 : }
     631             : 
     632           1 : static void __init ip4_frags_ctl_register(void)
     633             : {
     634           1 :         register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
     635           1 : }
     636             : #else
     637             : static int ip4_frags_ns_ctl_register(struct net *net)
     638             : {
     639             :         return 0;
     640             : }
     641             : 
     642             : static void ip4_frags_ns_ctl_unregister(struct net *net)
     643             : {
     644             : }
     645             : 
     646             : static void __init ip4_frags_ctl_register(void)
     647             : {
     648             : }
     649             : #endif
     650             : 
     651           1 : static int __net_init ipv4_frags_init_net(struct net *net)
     652             : {
     653           1 :         int res;
     654             : 
     655           1 :         res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net);
     656           1 :         if (res < 0)
     657             :                 return res;
     658             :         /* Fragment cache limits.
     659             :          *
     660             :          * The fragment memory accounting code, (tries to) account for
     661             :          * the real memory usage, by measuring both the size of frag
     662             :          * queue struct (inet_frag_queue (ipv4:ipq/ipv6:frag_queue))
     663             :          * and the SKB's truesize.
     664             :          *
     665             :          * A 64K fragment consumes 129736 bytes (44*2944)+200
     666             :          * (1500 truesize == 2944, sizeof(struct ipq) == 200)
     667             :          *
     668             :          * We will commit 4MB at one time. Should we cross that limit
     669             :          * we will prune down to 3MB, making room for approx 8 big 64K
     670             :          * fragments 8x128k.
     671             :          */
     672           1 :         net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024;
     673           1 :         net->ipv4.fqdir->low_thresh  = 3 * 1024 * 1024;
     674             :         /*
     675             :          * Important NOTE! Fragment queue must be destroyed before MSL expires.
     676             :          * RFC791 is wrong proposing to prolongate timer each fragment arrival
     677             :          * by TTL.
     678             :          */
     679           1 :         net->ipv4.fqdir->timeout = IP_FRAG_TIME;
     680             : 
     681           1 :         net->ipv4.fqdir->max_dist = 64;
     682             : 
     683           1 :         res = ip4_frags_ns_ctl_register(net);
     684           1 :         if (res < 0)
     685           0 :                 fqdir_exit(net->ipv4.fqdir);
     686             :         return res;
     687             : }
     688             : 
     689           0 : static void __net_exit ipv4_frags_pre_exit_net(struct net *net)
     690             : {
     691           0 :         fqdir_pre_exit(net->ipv4.fqdir);
     692           0 : }
     693             : 
     694           0 : static void __net_exit ipv4_frags_exit_net(struct net *net)
     695             : {
     696           0 :         ip4_frags_ns_ctl_unregister(net);
     697           0 :         fqdir_exit(net->ipv4.fqdir);
     698           0 : }
     699             : 
     700             : static struct pernet_operations ip4_frags_ops = {
     701             :         .init           = ipv4_frags_init_net,
     702             :         .pre_exit       = ipv4_frags_pre_exit_net,
     703             :         .exit           = ipv4_frags_exit_net,
     704             : };
     705             : 
     706             : 
     707           0 : static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
     708             : {
     709           0 :         return jhash2(data,
     710             :                       sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
     711             : }
     712             : 
     713           0 : static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
     714             : {
     715           0 :         const struct inet_frag_queue *fq = data;
     716             : 
     717           0 :         return jhash2((const u32 *)&fq->key.v4,
     718             :                       sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
     719             : }
     720             : 
     721           0 : static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
     722             : {
     723           0 :         const struct frag_v4_compare_key *key = arg->key;
     724           0 :         const struct inet_frag_queue *fq = ptr;
     725             : 
     726           0 :         return !!memcmp(&fq->key, key, sizeof(*key));
     727             : }
     728             : 
     729             : static const struct rhashtable_params ip4_rhash_params = {
     730             :         .head_offset            = offsetof(struct inet_frag_queue, node),
     731             :         .key_offset             = offsetof(struct inet_frag_queue, key),
     732             :         .key_len                = sizeof(struct frag_v4_compare_key),
     733             :         .hashfn                 = ip4_key_hashfn,
     734             :         .obj_hashfn             = ip4_obj_hashfn,
     735             :         .obj_cmpfn              = ip4_obj_cmpfn,
     736             :         .automatic_shrinking    = true,
     737             : };
     738             : 
     739           1 : void __init ipfrag_init(void)
     740             : {
     741           1 :         ip4_frags.constructor = ip4_frag_init;
     742           1 :         ip4_frags.destructor = ip4_frag_free;
     743           1 :         ip4_frags.qsize = sizeof(struct ipq);
     744           1 :         ip4_frags.frag_expire = ip_expire;
     745           1 :         ip4_frags.frags_cache_name = ip_frag_cache_name;
     746           1 :         ip4_frags.rhash_params = ip4_rhash_params;
     747           1 :         if (inet_frags_init(&ip4_frags))
     748           0 :                 panic("IP: failed to allocate ip4_frags cache\n");
     749           1 :         ip4_frags_ctl_register();
     750           1 :         register_pernet_subsys(&ip4_frags_ops);
     751           1 : }

Generated by: LCOV version 1.14