LCOV - code coverage report
Current view: top level - net/ipv4 - tcp_recovery.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 8 90 8.9 %
Date: 2021-04-22 12:43:58 Functions: 1 8 12.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : #include <linux/tcp.h>
       3             : #include <net/tcp.h>
       4             : 
       5           0 : static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
       6             : {
       7           0 :         return t1 > t2 || (t1 == t2 && after(seq1, seq2));
       8             : }
       9             : 
      10           0 : static u32 tcp_rack_reo_wnd(const struct sock *sk)
      11             : {
      12           0 :         struct tcp_sock *tp = tcp_sk(sk);
      13             : 
      14           0 :         if (!tp->reord_seen) {
      15             :                 /* If reordering has not been observed, be aggressive during
      16             :                  * the recovery or starting the recovery by DUPACK threshold.
      17             :                  */
      18           0 :                 if (inet_csk(sk)->icsk_ca_state >= TCP_CA_Recovery)
      19             :                         return 0;
      20             : 
      21           0 :                 if (tp->sacked_out >= tp->reordering &&
      22           0 :                     !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
      23             :                         return 0;
      24             :         }
      25             : 
      26             :         /* To be more reordering resilient, allow min_rtt/4 settling delay.
      27             :          * Use min_rtt instead of the smoothed RTT because reordering is
      28             :          * often a path property and less related to queuing or delayed ACKs.
      29             :          * Upon receiving DSACKs, linearly increase the window up to the
      30             :          * smoothed RTT.
      31             :          */
      32           0 :         return min((tcp_min_rtt(tp) >> 2) * tp->rack.reo_wnd_steps,
      33             :                    tp->srtt_us >> 3);
      34             : }
      35             : 
      36           0 : s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd)
      37             : {
      38           0 :         return tp->rack.rtt_us + reo_wnd -
      39           0 :                tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(skb));
      40             : }
      41             : 
      42             : /* RACK loss detection (IETF draft draft-ietf-tcpm-rack-01):
      43             :  *
      44             :  * Marks a packet lost, if some packet sent later has been (s)acked.
      45             :  * The underlying idea is similar to the traditional dupthresh and FACK
      46             :  * but they look at different metrics:
      47             :  *
      48             :  * dupthresh: 3 OOO packets delivered (packet count)
      49             :  * FACK: sequence delta to highest sacked sequence (sequence space)
      50             :  * RACK: sent time delta to the latest delivered packet (time domain)
      51             :  *
      52             :  * The advantage of RACK is it applies to both original and retransmitted
      53             :  * packet and therefore is robust against tail losses. Another advantage
      54             :  * is being more resilient to reordering by simply allowing some
      55             :  * "settling delay", instead of tweaking the dupthresh.
      56             :  *
      57             :  * When tcp_rack_detect_loss() detects some packets are lost and we
      58             :  * are not already in the CA_Recovery state, either tcp_rack_reo_timeout()
      59             :  * or tcp_time_to_recover()'s "Trick#1: the loss is proven" code path will
      60             :  * make us enter the CA_Recovery state.
      61             :  */
      62           0 : static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
      63             : {
      64           0 :         struct tcp_sock *tp = tcp_sk(sk);
      65           0 :         struct sk_buff *skb, *n;
      66           0 :         u32 reo_wnd;
      67             : 
      68           0 :         *reo_timeout = 0;
      69           0 :         reo_wnd = tcp_rack_reo_wnd(sk);
      70           0 :         list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
      71             :                                  tcp_tsorted_anchor) {
      72           0 :                 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
      73           0 :                 s32 remaining;
      74             : 
      75             :                 /* Skip ones marked lost but not yet retransmitted */
      76           0 :                 if ((scb->sacked & TCPCB_LOST) &&
      77             :                     !(scb->sacked & TCPCB_SACKED_RETRANS))
      78           0 :                         continue;
      79             : 
      80           0 :                 if (!tcp_rack_sent_after(tp->rack.mstamp,
      81             :                                          tcp_skb_timestamp_us(skb),
      82             :                                          tp->rack.end_seq, scb->end_seq))
      83             :                         break;
      84             : 
      85             :                 /* A packet is lost if it has not been s/acked beyond
      86             :                  * the recent RTT plus the reordering window.
      87             :                  */
      88           0 :                 remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
      89           0 :                 if (remaining <= 0) {
      90           0 :                         tcp_mark_skb_lost(sk, skb);
      91           0 :                         list_del_init(&skb->tcp_tsorted_anchor);
      92             :                 } else {
      93             :                         /* Record maximum wait time */
      94           0 :                         *reo_timeout = max_t(u32, *reo_timeout, remaining);
      95             :                 }
      96             :         }
      97           0 : }
      98             : 
      99           0 : bool tcp_rack_mark_lost(struct sock *sk)
     100             : {
     101           0 :         struct tcp_sock *tp = tcp_sk(sk);
     102           0 :         u32 timeout;
     103             : 
     104           0 :         if (!tp->rack.advanced)
     105             :                 return false;
     106             : 
     107             :         /* Reset the advanced flag to avoid unnecessary queue scanning */
     108           0 :         tp->rack.advanced = 0;
     109           0 :         tcp_rack_detect_loss(sk, &timeout);
     110           0 :         if (timeout) {
     111           0 :                 timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
     112           0 :                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
     113           0 :                                           timeout, inet_csk(sk)->icsk_rto);
     114             :         }
     115           0 :         return !!timeout;
     116             : }
     117             : 
     118             : /* Record the most recently (re)sent time among the (s)acked packets
     119             :  * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
     120             :  * draft-cheng-tcpm-rack-00.txt
     121             :  */
     122           0 : void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
     123             :                       u64 xmit_time)
     124             : {
     125           0 :         u32 rtt_us;
     126             : 
     127           0 :         rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, xmit_time);
     128           0 :         if (rtt_us < tcp_min_rtt(tp) && (sacked & TCPCB_RETRANS)) {
     129             :                 /* If the sacked packet was retransmitted, it's ambiguous
     130             :                  * whether the retransmission or the original (or the prior
     131             :                  * retransmission) was sacked.
     132             :                  *
     133             :                  * If the original is lost, there is no ambiguity. Otherwise
     134             :                  * we assume the original can be delayed up to aRTT + min_rtt.
     135             :                  * the aRTT term is bounded by the fast recovery or timeout,
     136             :                  * so it's at least one RTT (i.e., retransmission is at least
     137             :                  * an RTT later).
     138             :                  */
     139             :                 return;
     140             :         }
     141           0 :         tp->rack.advanced = 1;
     142           0 :         tp->rack.rtt_us = rtt_us;
     143           0 :         if (tcp_rack_sent_after(xmit_time, tp->rack.mstamp,
     144             :                                 end_seq, tp->rack.end_seq)) {
     145           0 :                 tp->rack.mstamp = xmit_time;
     146           0 :                 tp->rack.end_seq = end_seq;
     147             :         }
     148             : }
     149             : 
     150             : /* We have waited long enough to accommodate reordering. Mark the expired
     151             :  * packets lost and retransmit them.
     152             :  */
     153           0 : void tcp_rack_reo_timeout(struct sock *sk)
     154             : {
     155           0 :         struct tcp_sock *tp = tcp_sk(sk);
     156           0 :         u32 timeout, prior_inflight;
     157           0 :         u32 lost = tp->lost;
     158             : 
     159           0 :         prior_inflight = tcp_packets_in_flight(tp);
     160           0 :         tcp_rack_detect_loss(sk, &timeout);
     161           0 :         if (prior_inflight != tcp_packets_in_flight(tp)) {
     162           0 :                 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) {
     163           0 :                         tcp_enter_recovery(sk, false);
     164           0 :                         if (!inet_csk(sk)->icsk_ca_ops->cong_control)
     165           0 :                                 tcp_cwnd_reduction(sk, 1, tp->lost - lost, 0);
     166             :                 }
     167           0 :                 tcp_xmit_retransmit_queue(sk);
     168             :         }
     169           0 :         if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS)
     170           0 :                 tcp_rearm_rto(sk);
     171           0 : }
     172             : 
     173             : /* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
     174             :  *
     175             :  * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
     176             :  * by srtt), since there is possibility that spurious retransmission was
     177             :  * due to reordering delay longer than reo_wnd.
     178             :  *
     179             :  * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16)
     180             :  * no. of successful recoveries (accounts for full DSACK-based loss
     181             :  * recovery undo). After that, reset it to default (min_rtt/4).
     182             :  *
     183             :  * At max, reo_wnd is incremented only once per rtt. So that the new
     184             :  * DSACK on which we are reacting, is due to the spurious retx (approx)
     185             :  * after the reo_wnd has been updated last time.
     186             :  *
     187             :  * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than
     188             :  * absolute value to account for change in rtt.
     189             :  */
     190         351 : void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
     191             : {
     192         351 :         struct tcp_sock *tp = tcp_sk(sk);
     193             : 
     194         351 :         if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
     195         351 :             !rs->prior_delivered)
     196             :                 return;
     197             : 
     198             :         /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */
     199         351 :         if (before(rs->prior_delivered, tp->rack.last_delivered))
     200           0 :                 tp->rack.dsack_seen = 0;
     201             : 
     202             :         /* Adjust the reo_wnd if update is pending */
     203         351 :         if (tp->rack.dsack_seen) {
     204           0 :                 tp->rack.reo_wnd_steps = min_t(u32, 0xFF,
     205             :                                                tp->rack.reo_wnd_steps + 1);
     206           0 :                 tp->rack.dsack_seen = 0;
     207           0 :                 tp->rack.last_delivered = tp->delivered;
     208           0 :                 tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH;
     209         351 :         } else if (!tp->rack.reo_wnd_persist) {
     210         351 :                 tp->rack.reo_wnd_steps = 1;
     211             :         }
     212             : }
     213             : 
     214             : /* RFC6582 NewReno recovery for non-SACK connection. It simply retransmits
     215             :  * the next unacked packet upon receiving
     216             :  * a) three or more DUPACKs to start the fast recovery
     217             :  * b) an ACK acknowledging new data during the fast recovery.
     218             :  */
     219           0 : void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced)
     220             : {
     221           0 :         const u8 state = inet_csk(sk)->icsk_ca_state;
     222           0 :         struct tcp_sock *tp = tcp_sk(sk);
     223             : 
     224           0 :         if ((state < TCP_CA_Recovery && tp->sacked_out >= tp->reordering) ||
     225           0 :             (state == TCP_CA_Recovery && snd_una_advanced)) {
     226           0 :                 struct sk_buff *skb = tcp_rtx_queue_head(sk);
     227           0 :                 u32 mss;
     228             : 
     229           0 :                 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
     230             :                         return;
     231             : 
     232           0 :                 mss = tcp_skb_mss(skb);
     233           0 :                 if (tcp_skb_pcount(skb) > 1 && skb->len > mss)
     234           0 :                         tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
     235             :                                      mss, mss, GFP_ATOMIC);
     236             : 
     237           0 :                 tcp_mark_skb_lost(sk, skb);
     238             :         }
     239             : }

Generated by: LCOV version 1.14