1 // SPDX-License-Identifier: GPL-2.0
2 /* RTT/RTO calculation.
3 *
4 * Adapted from TCP for AF_RXRPC by David Howells ([email protected])
5 *
6 * https://tools.ietf.org/html/rfc6298
7 * https://tools.ietf.org/html/rfc1122#section-4.2.3.1
8 * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf
9 */
10
11 #include <linux/net.h>
12 #include "ar-internal.h"
13
14 #define RXRPC_RTO_MAX (120 * USEC_PER_SEC)
15 #define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * USEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */
16 #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */
17
rxrpc_rto_min_us(struct rxrpc_call * call)18 static u32 rxrpc_rto_min_us(struct rxrpc_call *call)
19 {
20 return 200;
21 }
22
__rxrpc_set_rto(const struct rxrpc_call * call)23 static u32 __rxrpc_set_rto(const struct rxrpc_call *call)
24 {
25 return (call->srtt_us >> 3) + call->rttvar_us;
26 }
27
rxrpc_bound_rto(u32 rto)28 static u32 rxrpc_bound_rto(u32 rto)
29 {
30 return clamp(200000, rto + 100000, RXRPC_RTO_MAX);
31 }
32
33 /*
34 * Called to compute a smoothed rtt estimate. The data fed to this
35 * routine either comes from timestamps, or from segments that were
36 * known _not_ to have been retransmitted [see Karn/Partridge
37 * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
38 * piece by Van Jacobson.
39 * NOTE: the next three routines used to be one big routine.
40 * To save cycles in the RFC 1323 implementation it was better to break
41 * it up into three procedures. -- erics
42 */
rxrpc_rtt_estimator(struct rxrpc_call * call,long sample_rtt_us)43 static void rxrpc_rtt_estimator(struct rxrpc_call *call, long sample_rtt_us)
44 {
45 long m = sample_rtt_us; /* RTT */
46 u32 srtt = call->srtt_us;
47
48 /* The following amusing code comes from Jacobson's
49 * article in SIGCOMM '88. Note that rtt and mdev
50 * are scaled versions of rtt and mean deviation.
51 * This is designed to be as fast as possible
52 * m stands for "measurement".
53 *
54 * On a 1990 paper the rto value is changed to:
55 * RTO = rtt + 4 * mdev
56 *
57 * Funny. This algorithm seems to be very broken.
58 * These formulae increase RTO, when it should be decreased, increase
59 * too slowly, when it should be increased quickly, decrease too quickly
60 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
61 * does not matter how to _calculate_ it. Seems, it was trap
62 * that VJ failed to avoid. 8)
63 */
64 if (srtt != 0) {
65 m -= (srtt >> 3); /* m is now error in rtt est */
66 srtt += m; /* rtt = 7/8 rtt + 1/8 new */
67 if (m < 0) {
68 m = -m; /* m is now abs(error) */
69 m -= (call->mdev_us >> 2); /* similar update on mdev */
70 /* This is similar to one of Eifel findings.
71 * Eifel blocks mdev updates when rtt decreases.
72 * This solution is a bit different: we use finer gain
73 * for mdev in this case (alpha*beta).
74 * Like Eifel it also prevents growth of rto,
75 * but also it limits too fast rto decreases,
76 * happening in pure Eifel.
77 */
78 if (m > 0)
79 m >>= 3;
80 } else {
81 m -= (call->mdev_us >> 2); /* similar update on mdev */
82 }
83
84 call->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */
85 if (call->mdev_us > call->mdev_max_us) {
86 call->mdev_max_us = call->mdev_us;
87 if (call->mdev_max_us > call->rttvar_us)
88 call->rttvar_us = call->mdev_max_us;
89 }
90 } else {
91 /* no previous measure. */
92 srtt = m << 3; /* take the measured time to be rtt */
93 call->mdev_us = m << 1; /* make sure rto = 3*rtt */
94 call->rttvar_us = umax(call->mdev_us, rxrpc_rto_min_us(call));
95 call->mdev_max_us = call->rttvar_us;
96 }
97
98 call->srtt_us = umax(srtt, 1);
99 }
100
101 /*
102 * Calculate rto without backoff. This is the second half of Van Jacobson's
103 * routine referred to above.
104 */
rxrpc_set_rto(struct rxrpc_call * call)105 static void rxrpc_set_rto(struct rxrpc_call *call)
106 {
107 u32 rto;
108
109 /* 1. If rtt variance happened to be less 50msec, it is hallucination.
110 * It cannot be less due to utterly erratic ACK generation made
111 * at least by solaris and freebsd. "Erratic ACKs" has _nothing_
112 * to do with delayed acks, because at cwnd>2 true delack timeout
113 * is invisible. Actually, Linux-2.4 also generates erratic
114 * ACKs in some circumstances.
115 */
116 rto = __rxrpc_set_rto(call);
117
118 /* 2. Fixups made earlier cannot be right.
119 * If we do not estimate RTO correctly without them,
120 * all the algo is pure shit and should be replaced
121 * with correct one. It is exactly, which we pretend to do.
122 */
123
124 /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo
125 * guarantees that rto is higher.
126 */
127 call->rto_us = rxrpc_bound_rto(rto);
128 }
129
rxrpc_update_rtt_min(struct rxrpc_call * call,ktime_t resp_time,long rtt_us)130 static void rxrpc_update_rtt_min(struct rxrpc_call *call, ktime_t resp_time, long rtt_us)
131 {
132 /* Window size 5mins in approx usec (ipv4.sysctl_tcp_min_rtt_wlen) */
133 u32 wlen_us = 5ULL * NSEC_PER_SEC / 1024;
134
135 minmax_running_min(&call->min_rtt, wlen_us, resp_time / 1024,
136 (u32)rtt_us ? : jiffies_to_usecs(1));
137 }
138
rxrpc_ack_update_rtt(struct rxrpc_call * call,ktime_t resp_time,long rtt_us)139 static void rxrpc_ack_update_rtt(struct rxrpc_call *call, ktime_t resp_time, long rtt_us)
140 {
141 if (rtt_us < 0)
142 return;
143
144 /* Update RACK min RTT [RFC8985 6.1 Step 1]. */
145 rxrpc_update_rtt_min(call, resp_time, rtt_us);
146
147 rxrpc_rtt_estimator(call, rtt_us);
148 rxrpc_set_rto(call);
149
150 /* Only reset backoff on valid RTT measurement [RFC6298]. */
151 call->backoff = 0;
152 }
153
154 /*
155 * Add RTT information to cache. This is called in softirq mode and has
156 * exclusive access to the call RTT data.
157 */
rxrpc_call_add_rtt(struct rxrpc_call * call,enum rxrpc_rtt_rx_trace why,int rtt_slot,rxrpc_serial_t send_serial,rxrpc_serial_t resp_serial,ktime_t send_time,ktime_t resp_time)158 void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
159 int rtt_slot,
160 rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
161 ktime_t send_time, ktime_t resp_time)
162 {
163 s64 rtt_us;
164
165 rtt_us = ktime_to_us(ktime_sub(resp_time, send_time));
166 if (rtt_us < 0)
167 return;
168
169 rxrpc_ack_update_rtt(call, resp_time, rtt_us);
170 if (call->rtt_count < 3)
171 call->rtt_count++;
172 call->rtt_taken++;
173
174 WRITE_ONCE(call->peer->recent_srtt_us, call->srtt_us / 8);
175 WRITE_ONCE(call->peer->recent_rto_us, call->rto_us);
176
177 trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial,
178 rtt_us, call->srtt_us, call->rto_us);
179 }
180
181 /*
182 * Get the retransmission timeout to set in nanoseconds, backing it off each
183 * time we retransmit.
184 */
rxrpc_get_rto_backoff(struct rxrpc_call * call,bool retrans)185 ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans)
186 {
187 u64 timo_us;
188 u32 backoff = READ_ONCE(call->backoff);
189
190 timo_us = call->rto_us;
191 timo_us <<= backoff;
192 if (retrans && timo_us * 2 <= RXRPC_RTO_MAX)
193 WRITE_ONCE(call->backoff, backoff + 1);
194
195 if (timo_us < 1)
196 timo_us = 1;
197
198 return ns_to_ktime(timo_us * NSEC_PER_USEC);
199 }
200
rxrpc_call_init_rtt(struct rxrpc_call * call)201 void rxrpc_call_init_rtt(struct rxrpc_call *call)
202 {
203 call->rtt_last_req = KTIME_MIN;
204 call->rto_us = RXRPC_TIMEOUT_INIT;
205 call->mdev_us = RXRPC_TIMEOUT_INIT;
206 call->backoff = 0;
207 //minmax_reset(&call->rtt_min, rxrpc_jiffies32, ~0U);
208 }
209