Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /* QUIC kernel implementation
3 : * (C) Copyright Red Hat Corp. 2023
4 : *
5 : * This file is part of the QUIC kernel implementation
6 : *
7 : * Initialization/cleanup for QUIC protocol support.
8 : *
9 : * Written or modified by:
10 : * Xin Long <lucien.xin@gmail.com>
11 : */
12 :
13 : #include <linux/jiffies.h>
14 : #include <linux/quic.h>
15 : #include <net/sock.h>
16 :
17 : #include "common.h"
18 : #include "cong.h"
19 :
20 : /* CUBIC APIs */
21 : struct quic_cubic {
22 : /* Variables of Interest in rfc9438#section-4.1.2 */
23 : u32 pending_w_add; /* Accumulate fractional increments to W_est */
24 : u32 origin_point; /* W_max */
25 : u32 epoch_start; /* t_epoch */
26 : u32 pending_add; /* Accumulates fractional additions to W_cubic */
27 : u32 w_last_max; /* last W_max */
28 : u32 w_tcp; /* W_est */
29 : u64 k; /* K */
30 :
31 : /* HyStart++ variables in rfc9406#section-4.2 */
32 : u32 current_round_min_rtt; /* currentRoundMinRTT */
33 : u32 css_baseline_min_rtt; /* cssBaselineMinRtt */
34 : u32 last_round_min_rtt; /* lastRoundMinRTT */
35 : u16 rtt_sample_count; /* rttSampleCount */
36 : u16 css_rounds; /* Counter for consecutive rounds showing RTT increase */
37 : s64 window_end; /* End of current CSS round (packet number) */
38 : };
39 :
40 : /* HyStart++ constants in rfc9406#section-4.3 */
41 : #define QUIC_HS_MIN_SSTHRESH 16
42 : #define QUIC_HS_N_RTT_SAMPLE 8
43 : #define QUIC_HS_MIN_ETA 4000
44 : #define QUIC_HS_MAX_ETA 16000
45 : #define QUIC_HS_MIN_RTT_DIVISOR 8
46 : #define QUIC_HS_CSS_GROWTH_DIVISOR 4
47 : #define QUIC_HS_CSS_ROUNDS 5
48 :
49 81 : static u64 cubic_root(u64 n)
50 : {
51 81 : u64 a, d;
52 :
53 81 : if (!n)
54 : return 0;
55 :
56 15 : d = (64 - __builtin_clzll(n)) / 3;
57 15 : a = BIT_ULL(d + 1);
58 :
59 40 : for (; a * a * a > n;) {
60 25 : d = div64_ul(n, a * a);
61 25 : a = div64_ul(2 * a + d, 3);
62 : }
63 : return a;
64 : }
65 :
66 : /* rfc9406#section-4: HyStart++ Algorithm */
67 3596024 : static void cubic_slow_start(struct quic_cong *cong, u32 bytes, s64 number)
68 : {
69 3596024 : struct quic_cubic *cubic = quic_cong_priv(cong);
70 3596024 : u32 eta;
71 :
72 3596024 : if (cubic->window_end <= number)
73 3595418 : cubic->window_end = -1;
74 :
75 : /* cwnd = cwnd + (min(N, L * SMSS) / CSS_GROWTH_DIVISOR) */
76 3596024 : if (cubic->css_baseline_min_rtt != U32_MAX)
77 0 : bytes = bytes / QUIC_HS_CSS_GROWTH_DIVISOR;
78 3596024 : cong->window = min_t(u32, cong->window + bytes, cong->max_window);
79 :
80 3596024 : if (cubic->css_baseline_min_rtt != U32_MAX) {
81 : /* If CSS_ROUNDS rounds are complete, enter congestion avoidance. */
82 0 : if (++cubic->css_rounds > QUIC_HS_CSS_ROUNDS) {
83 0 : cubic->css_baseline_min_rtt = U32_MAX;
84 0 : cubic->w_last_max = cong->window;
85 0 : cong->ssthresh = cong->window;
86 0 : cubic->css_rounds = 0;
87 : }
88 0 : return;
89 : }
90 :
91 : /* if ((rttSampleCount >= N_RTT_SAMPLE) AND
92 : * (currentRoundMinRTT != infinity) AND
93 : * (lastRoundMinRTT != infinity))
94 : * RttThresh = max(MIN_RTT_THRESH,
95 : * min(lastRoundMinRTT / MIN_RTT_DIVISOR, MAX_RTT_THRESH))
96 : * if (currentRoundMinRTT >= (lastRoundMinRTT + RttThresh))
97 : * cssBaselineMinRtt = currentRoundMinRTT
98 : * exit slow start and enter CSS
99 : */
100 3596024 : if (cubic->last_round_min_rtt != U32_MAX &&
101 3595921 : cubic->current_round_min_rtt != U32_MAX &&
102 3595913 : cong->window >= QUIC_HS_MIN_SSTHRESH * cong->mss &&
103 3595913 : cubic->rtt_sample_count >= QUIC_HS_N_RTT_SAMPLE) {
104 0 : eta = cubic->last_round_min_rtt / QUIC_HS_MIN_RTT_DIVISOR;
105 0 : if (eta < QUIC_HS_MIN_ETA)
106 : eta = QUIC_HS_MIN_ETA;
107 0 : else if (eta > QUIC_HS_MAX_ETA)
108 0 : eta = QUIC_HS_MAX_ETA;
109 :
110 0 : pr_debug("%s: current_round_min_rtt: %u, last_round_min_rtt: %u, eta: %u\n",
111 : __func__, cubic->current_round_min_rtt, cubic->last_round_min_rtt, eta);
112 :
113 : /* Delay increase triggers slow start exit and enter CSS. */
114 0 : if (cubic->current_round_min_rtt >= cubic->last_round_min_rtt + eta)
115 0 : cubic->css_baseline_min_rtt = cubic->current_round_min_rtt;
116 : }
117 : }
118 :
119 : /* rfc9438#section-4: CUBIC Congestion Control */
120 1064373 : static void cubic_cong_avoid(struct quic_cong *cong, u32 bytes)
121 : {
122 1064373 : struct quic_cubic *cubic = quic_cong_priv(cong);
123 1064373 : u64 tx, kx, time_delta, delta, t;
124 1064373 : u64 target_add, tcp_add = 0;
125 1064373 : u64 target, cwnd_thres, m;
126 :
127 1064373 : if (cubic->epoch_start == U32_MAX) {
128 130 : cubic->epoch_start = cong->time;
129 130 : if (cong->window < cubic->w_last_max) {
130 : /*
131 : * ┌────────────────┐
132 : * 3 │W - cwnd
133 : * ╲ │ max epoch
134 : * K = ╲ │────────────────
135 : * ╲│ C
136 : */
137 81 : cubic->k = cubic->w_last_max - cong->window;
138 81 : cubic->k = cubic_root(div64_ul(cubic->k * 10, (u64)cong->mss * 4));
139 81 : cubic->origin_point = cubic->w_last_max;
140 : } else {
141 49 : cubic->k = 0;
142 49 : cubic->origin_point = cong->window;
143 : }
144 130 : cubic->w_tcp = cong->window;
145 130 : cubic->pending_add = 0;
146 130 : cubic->pending_w_add = 0;
147 : }
148 :
149 : /*
150 : * t = t - t
151 : * current epoch
152 : */
153 1064373 : t = cong->time - cubic->epoch_start;
154 1064373 : tx = div64_ul(t << 10, USEC_PER_SEC);
155 1064373 : kx = (cubic->k << 10);
156 1064373 : if (tx > kx)
157 57 : time_delta = tx - kx;
158 : else
159 1064316 : time_delta = kx - tx;
160 : /*
161 : * 3
162 : * W (t) = C * (t - K) + W
163 : * cubic max
164 : */
165 1064373 : delta = cong->mss * ((((time_delta * time_delta) >> 10) * time_delta) >> 10);
166 1064373 : delta = div64_ul(delta * 4, 10) >> 10;
167 1064373 : if (tx > kx)
168 57 : target = cubic->origin_point + delta;
169 : else
170 1064316 : target = cubic->origin_point - delta;
171 :
172 : /*
173 : * W (t + RTT)
174 : * cubic
175 : */
176 1064373 : cwnd_thres = (div64_ul((t + cong->smoothed_rtt) << 10, USEC_PER_SEC) * target) >> 10;
177 1064373 : pr_debug("%s: tgt: %llu, thres: %llu, delta: %llu, t: %llu, srtt: %u, tx: %llu, kx: %llu\n",
178 : __func__, target, cwnd_thres, delta, t, cong->smoothed_rtt, tx, kx);
179 : /*
180 : * ⎧
181 : * ⎪cwnd if W (t + RTT) < cwnd
182 : * ⎪ cubic
183 : * ⎨1.5 * cwnd if W (t + RTT) > 1.5 * cwnd
184 : * target = ⎪ cubic
185 : * ⎪W (t + RTT) otherwise
186 : * ⎩ cubic
187 : */
188 1064373 : if (cwnd_thres < cong->window)
189 : target = cong->window;
190 650132 : else if (cwnd_thres * 2 > (u64)cong->window * 3)
191 0 : target = cong->window * 3 / 2;
192 : else
193 : target = cwnd_thres;
194 :
195 : /*
196 : * target - cwnd
197 : * ─────────────
198 : * cwnd
199 : */
200 1064373 : if (target > cong->window) {
201 650132 : target_add = cubic->pending_add + cong->mss * (target - cong->window);
202 650132 : cubic->pending_add = do_div(target_add, cong->window);
203 : } else {
204 414241 : target_add = cubic->pending_add + cong->mss;
205 414241 : cubic->pending_add = do_div(target_add, 100 * cong->window);
206 : }
207 :
208 1064373 : pr_debug("%s: target: %llu, window: %u, target_add: %llu\n",
209 : __func__, target, cong->window, target_add);
210 :
211 : /*
212 : * segments_acked
213 : * W = W + α * ──────────────
214 : * est est cubic cwnd
215 : */
216 1064373 : m = cubic->pending_w_add + cong->mss * bytes;
217 1064373 : cubic->pending_w_add = do_div(m, cong->window);
218 1064373 : cubic->w_tcp += m;
219 :
220 1064373 : if (cubic->w_tcp > cong->window)
221 416753 : tcp_add = div64_ul((u64)cong->mss * (cubic->w_tcp - cong->window), cong->window);
222 :
223 1064373 : pr_debug("%s: w_tcp: %u, window: %u, tcp_add: %llu\n",
224 : __func__, cubic->w_tcp, cong->window, tcp_add);
225 :
226 : /* W_cubic(_t_) or _W_est_, whichever is bigger. */
227 1064373 : cong->window += max(tcp_add, target_add);
228 1064373 : }
229 :
230 147 : static void cubic_recovery(struct quic_cong *cong)
231 : {
232 147 : struct quic_cubic *cubic = quic_cong_priv(cong);
233 :
234 147 : cong->recovery_time = cong->time;
235 147 : cubic->epoch_start = U32_MAX;
236 :
237 : /* rfc9438#section-3.4:
238 : * CUBIC sets the multiplicative window decrease factor (β__cubic_) to 0.7,
239 : * whereas Reno uses 0.5.
240 : *
241 : * rfc9438#section-4.6:
242 : * ssthresh = flight_size * β new ssthresh
243 : *
244 : * Some implementations of CUBIC currently use _cwnd_ instead of _flight_size_ when
245 : * calculating a new _ssthresh_.
246 : *
247 : * rfc9438#section-4.7:
248 : *
249 : * ⎧ 1 + β
250 : * ⎪ cubic
251 : * ⎪cwnd * ────────── if cwnd < W_max and fast convergence
252 : * W = ⎨ 2
253 : * max ⎪ enabled, further reduce W_max
254 : * ⎪
255 : * ⎩cwnd otherwise, remember cwnd before reduction
256 : */
257 147 : if (cong->window < cubic->w_last_max)
258 56 : cubic->w_last_max = cong->window * 17 / 10 / 2;
259 : else
260 91 : cubic->w_last_max = cong->window;
261 :
262 147 : cong->ssthresh = cong->window * 7 / 10;
263 147 : cong->ssthresh = max(cong->ssthresh, cong->min_window);
264 147 : cong->window = cong->ssthresh;
265 147 : }
266 :
267 8693 : static int quic_cong_check_persistent_congestion(struct quic_cong *cong, u32 time)
268 : {
269 8693 : u32 ssthresh;
270 :
271 : /* rfc9002#section-7.6.1:
272 : * (smoothed_rtt + max(4*rttvar, kGranularity) + max_ack_delay) *
273 : * kPersistentCongestionThreshold
274 : */
275 8693 : ssthresh = cong->smoothed_rtt + max(4 * cong->rttvar, QUIC_KGRANULARITY);
276 8693 : ssthresh = (ssthresh + cong->max_ack_delay) * QUIC_KPERSISTENT_CONGESTION_THRESHOLD;
277 8693 : if (cong->time - time <= ssthresh)
278 : return 0;
279 :
280 60 : pr_debug("%s: permanent congestion, cwnd: %u, ssthresh: %u\n",
281 : __func__, cong->window, cong->ssthresh);
282 60 : cong->min_rtt_valid = 0;
283 60 : cong->window = cong->min_window;
284 60 : cong->state = QUIC_CONG_SLOW_START;
285 60 : return 1;
286 : }
287 :
288 4357 : static void quic_cubic_on_packet_lost(struct quic_cong *cong, u32 time, u32 bytes, s64 number)
289 : {
290 4357 : if (quic_cong_check_persistent_congestion(cong, time))
291 : return;
292 :
293 4357 : switch (cong->state) {
294 : case QUIC_CONG_SLOW_START:
295 4 : pr_debug("%s: slow_start -> recovery, cwnd: %u, ssthresh: %u\n",
296 : __func__, cong->window, cong->ssthresh);
297 : break;
298 : case QUIC_CONG_RECOVERY_PERIOD:
299 : return;
300 : case QUIC_CONG_CONGESTION_AVOIDANCE:
301 143 : pr_debug("%s: cong_avoid -> recovery, cwnd: %u, ssthresh: %u\n",
302 : __func__, cong->window, cong->ssthresh);
303 : break;
304 : default:
305 0 : pr_debug("%s: wrong congestion state: %d\n", __func__, cong->state);
306 : return;
307 : }
308 :
309 147 : cong->state = QUIC_CONG_RECOVERY_PERIOD;
310 147 : cubic_recovery(cong);
311 : }
312 :
313 4661137 : static void quic_cubic_on_packet_acked(struct quic_cong *cong, u32 time, u32 bytes, s64 number)
314 : {
315 4661137 : switch (cong->state) {
316 3596024 : case QUIC_CONG_SLOW_START:
317 3596024 : cubic_slow_start(cong, bytes, number);
318 3596024 : if (cong->window >= cong->ssthresh) {
319 0 : cong->state = QUIC_CONG_CONGESTION_AVOIDANCE;
320 0 : pr_debug("%s: slow_start -> cong_avoid, cwnd: %u, ssthresh: %u\n",
321 : __func__, cong->window, cong->ssthresh);
322 : }
323 : break;
324 740 : case QUIC_CONG_RECOVERY_PERIOD:
325 740 : if (cong->recovery_time < time) {
326 146 : cong->state = QUIC_CONG_CONGESTION_AVOIDANCE;
327 146 : pr_debug("%s: recovery -> cong_avoid, cwnd: %u, ssthresh: %u\n",
328 : __func__, cong->window, cong->ssthresh);
329 : }
330 : break;
331 1064373 : case QUIC_CONG_CONGESTION_AVOIDANCE:
332 1064373 : cubic_cong_avoid(cong, bytes);
333 1064373 : break;
334 : default:
335 0 : pr_debug("%s: wrong congestion state: %d\n", __func__, cong->state);
336 : return;
337 : }
338 : }
339 :
340 0 : static void quic_cubic_on_process_ecn(struct quic_cong *cong)
341 : {
342 0 : switch (cong->state) {
343 : case QUIC_CONG_SLOW_START:
344 0 : pr_debug("%s: slow_start -> recovery, cwnd: %u, ssthresh: %u\n",
345 : __func__, cong->window, cong->ssthresh);
346 : break;
347 : case QUIC_CONG_RECOVERY_PERIOD:
348 : return;
349 : case QUIC_CONG_CONGESTION_AVOIDANCE:
350 0 : pr_debug("%s: cong_avoid -> recovery, cwnd: %u, ssthresh: %u\n",
351 : __func__, cong->window, cong->ssthresh);
352 : break;
353 : default:
354 0 : pr_debug("%s: wrong congestion state: %d\n", __func__, cong->state);
355 : return;
356 : }
357 :
358 0 : cong->state = QUIC_CONG_RECOVERY_PERIOD;
359 0 : cubic_recovery(cong);
360 : }
361 :
362 8 : static void quic_cubic_on_init(struct quic_cong *cong)
363 : {
364 8 : struct quic_cubic *cubic = quic_cong_priv(cong);
365 :
366 8 : cubic->epoch_start = U32_MAX;
367 8 : cubic->origin_point = 0;
368 8 : cubic->w_last_max = 0;
369 8 : cubic->w_tcp = 0;
370 8 : cubic->k = 0;
371 :
372 8 : cubic->current_round_min_rtt = U32_MAX;
373 8 : cubic->css_baseline_min_rtt = U32_MAX;
374 8 : cubic->last_round_min_rtt = U32_MAX;
375 8 : cubic->rtt_sample_count = 0;
376 8 : cubic->window_end = -1;
377 8 : cubic->css_rounds = 0;
378 8 : }
379 :
380 4665494 : static void quic_cubic_on_packet_sent(struct quic_cong *cong, u32 time, u32 bytes, s64 number)
381 : {
382 4665494 : struct quic_cubic *cubic = quic_cong_priv(cong);
383 :
384 4665494 : if (cubic->window_end != -1)
385 : return;
386 :
387 : /* rfc9406#section-4.2:
388 : * lastRoundMinRTT = currentRoundMinRTT
389 : * currentRoundMinRTT = infinity
390 : * rttSampleCount = 0
391 : */
392 88150 : cubic->window_end = number;
393 88150 : cubic->last_round_min_rtt = cubic->current_round_min_rtt;
394 88150 : cubic->current_round_min_rtt = U32_MAX;
395 88150 : cubic->rtt_sample_count = 0;
396 :
397 88150 : pr_debug("%s: last_round_min_rtt: %u\n", __func__, cubic->last_round_min_rtt);
398 : }
399 :
400 114260 : static void quic_cubic_on_rtt_update(struct quic_cong *cong)
401 : {
402 114260 : struct quic_cubic *cubic = quic_cong_priv(cong);
403 :
404 114260 : if (cubic->window_end == -1)
405 : return;
406 :
407 114211 : pr_debug("%s: current_round_min_rtt: %u, latest_rtt: %u\n",
408 : __func__, cubic->current_round_min_rtt, cong->latest_rtt);
409 :
410 : /* rfc9406#section-4.2:
411 : * currentRoundMinRTT = min(currentRoundMinRTT, currRTT)
412 : * rttSampleCount += 1
413 : */
414 114211 : if (cubic->current_round_min_rtt > cong->latest_rtt) {
415 88196 : cubic->current_round_min_rtt = cong->latest_rtt;
416 88196 : if (cubic->current_round_min_rtt < cubic->css_baseline_min_rtt) {
417 88196 : cubic->css_baseline_min_rtt = U32_MAX;
418 88196 : cubic->css_rounds = 0;
419 : }
420 : }
421 114211 : cubic->rtt_sample_count++;
422 : }
423 :
424 : /* NEW RENO APIs */
425 4336 : static void quic_reno_on_packet_lost(struct quic_cong *cong, u32 time, u32 bytes, s64 number)
426 : {
427 4336 : if (quic_cong_check_persistent_congestion(cong, time))
428 : return;
429 :
430 4276 : switch (cong->state) {
431 : case QUIC_CONG_SLOW_START:
432 525 : pr_debug("%s: slow_start -> recovery, cwnd: %u, ssthresh: %u\n",
433 : __func__, cong->window, cong->ssthresh);
434 : break;
435 : case QUIC_CONG_RECOVERY_PERIOD:
436 : return;
437 : case QUIC_CONG_CONGESTION_AVOIDANCE:
438 503 : pr_debug("%s: cong_avoid -> recovery, cwnd: %u, ssthresh: %u\n",
439 : __func__, cong->window, cong->ssthresh);
440 : break;
441 : default:
442 0 : pr_debug("%s: wrong congestion state: %d\n", __func__, cong->state);
443 : return;
444 : }
445 :
446 1028 : cong->recovery_time = cong->time;
447 1028 : cong->state = QUIC_CONG_RECOVERY_PERIOD;
448 1028 : cong->ssthresh = max(cong->window >> 1U, cong->min_window);
449 1028 : cong->window = cong->ssthresh;
450 : }
451 :
452 1813480 : static void quic_reno_on_packet_acked(struct quic_cong *cong, u32 time, u32 bytes, s64 number)
453 : {
454 1813480 : switch (cong->state) {
455 1680233 : case QUIC_CONG_SLOW_START:
456 1680233 : cong->window = min_t(u32, cong->window + bytes, cong->max_window);
457 1680233 : if (cong->window >= cong->ssthresh) {
458 2 : cong->state = QUIC_CONG_CONGESTION_AVOIDANCE;
459 2 : pr_debug("%s: slow_start -> cong_avoid, cwnd: %u, ssthresh: %u\n",
460 : __func__, cong->window, cong->ssthresh);
461 : }
462 : break;
463 10322 : case QUIC_CONG_RECOVERY_PERIOD:
464 10322 : if (cong->recovery_time < time) {
465 648 : cong->state = QUIC_CONG_CONGESTION_AVOIDANCE;
466 648 : pr_debug("%s: recovery -> cong_avoid, cwnd: %u, ssthresh: %u\n",
467 : __func__, cong->window, cong->ssthresh);
468 : }
469 : break;
470 122925 : case QUIC_CONG_CONGESTION_AVOIDANCE:
471 122925 : cong->window += cong->mss * bytes / cong->window;
472 122925 : break;
473 : default:
474 0 : pr_debug("%s: wrong congestion state: %d\n", __func__, cong->state);
475 : return;
476 : }
477 : }
478 :
479 0 : static void quic_reno_on_process_ecn(struct quic_cong *cong)
480 : {
481 0 : switch (cong->state) {
482 : case QUIC_CONG_SLOW_START:
483 0 : pr_debug("%s: slow_start -> recovery, cwnd: %u, ssthresh: %u\n",
484 : __func__, cong->window, cong->ssthresh);
485 : break;
486 : case QUIC_CONG_RECOVERY_PERIOD:
487 : return;
488 : case QUIC_CONG_CONGESTION_AVOIDANCE:
489 0 : pr_debug("%s: cong_avoid -> recovery, cwnd: %u, ssthresh: %u\n",
490 : __func__, cong->window, cong->ssthresh);
491 : break;
492 : default:
493 0 : pr_debug("%s: wrong congestion state: %d\n", __func__, cong->state);
494 : return;
495 : }
496 :
497 0 : cong->recovery_time = cong->time;
498 0 : cong->state = QUIC_CONG_RECOVERY_PERIOD;
499 0 : cong->ssthresh = max(cong->window >> 1U, cong->min_window);
500 0 : cong->window = cong->ssthresh;
501 : }
502 :
503 1119 : static void quic_reno_on_init(struct quic_cong *cong)
504 : {
505 1119 : }
506 :
507 : static struct quic_cong_ops quic_congs[] = {
508 : { /* QUIC_CONG_ALG_RENO */
509 : .on_packet_acked = quic_reno_on_packet_acked,
510 : .on_packet_lost = quic_reno_on_packet_lost,
511 : .on_process_ecn = quic_reno_on_process_ecn,
512 : .on_init = quic_reno_on_init,
513 : },
514 : { /* QUIC_CONG_ALG_CUBIC */
515 : .on_packet_acked = quic_cubic_on_packet_acked,
516 : .on_packet_lost = quic_cubic_on_packet_lost,
517 : .on_process_ecn = quic_cubic_on_process_ecn,
518 : .on_init = quic_cubic_on_init,
519 : .on_packet_sent = quic_cubic_on_packet_sent,
520 : .on_rtt_update = quic_cubic_on_rtt_update,
521 : },
522 : };
523 :
524 : /* COMMON APIs */
525 8693 : void quic_cong_on_packet_lost(struct quic_cong *cong, u32 time, u32 bytes, s64 number)
526 : {
527 8693 : cong->ops->on_packet_lost(cong, time, bytes, number);
528 8693 : }
529 :
530 6474617 : void quic_cong_on_packet_acked(struct quic_cong *cong, u32 time, u32 bytes, s64 number)
531 : {
532 6474617 : cong->ops->on_packet_acked(cong, time, bytes, number);
533 6474617 : }
534 :
535 0 : void quic_cong_on_process_ecn(struct quic_cong *cong)
536 : {
537 0 : cong->ops->on_process_ecn(cong);
538 0 : }
539 :
540 : /* Update Probe Timeout (PTO) and loss detection delay based on RTT stats. */
541 371305 : static void quic_cong_pto_update(struct quic_cong *cong)
542 : {
543 371305 : u32 pto, loss_delay;
544 :
545 : /* rfc9002#section-6.2.1:
546 : * PTO = smoothed_rtt + max(4*rttvar, kGranularity) + max_ack_delay
547 : */
548 371305 : pto = cong->smoothed_rtt + max(4 * cong->rttvar, QUIC_KGRANULARITY);
549 371305 : cong->pto = pto + cong->max_ack_delay;
550 :
551 : /* rfc9002#section-6.1.2:
552 : * max(kTimeThreshold * max(smoothed_rtt, latest_rtt), kGranularity)
553 : */
554 371305 : loss_delay = QUIC_KTIME_THRESHOLD(max(cong->smoothed_rtt, cong->latest_rtt));
555 371305 : cong->loss_delay = max(loss_delay, QUIC_KGRANULARITY);
556 :
557 371305 : pr_debug("%s: update pto: %u\n", __func__, pto);
558 371305 : }
559 :
560 : /* Update pacing timestamp after sending 'bytes' bytes.
561 : *
562 : * This function tracks when the next packet is allowed to be sent based on pacing rate.
563 : */
564 6484479 : static void quic_cong_update_pacing_time(struct quic_cong *cong, u32 bytes)
565 : {
566 6484479 : unsigned long rate = READ_ONCE(cong->pacing_rate);
567 6484479 : u64 prior_time, credit, len_ns;
568 :
569 6484479 : if (!rate)
570 : return;
571 :
572 6482419 : prior_time = cong->pacing_time;
573 6482419 : cong->pacing_time = max(cong->pacing_time, ktime_get_ns());
574 6482527 : credit = cong->pacing_time - prior_time;
575 :
576 : /* take into account OS jitter */
577 6482527 : len_ns = div64_ul((u64)bytes * NSEC_PER_SEC, rate);
578 6482527 : len_ns -= min_t(u64, len_ns / 2, credit);
579 6482527 : cong->pacing_time += len_ns;
580 : }
581 :
582 : /* Compute and update the pacing rate based on congestion window and smoothed RTT. */
583 374118 : static void quic_cong_pace_update(struct quic_cong *cong, u32 bytes, u32 max_rate)
584 : {
585 374118 : u64 rate;
586 :
587 : /* rate = N * congestion_window / smoothed_rtt */
588 374118 : rate = (u64)cong->window * USEC_PER_SEC * 2;
589 374118 : if (likely(cong->smoothed_rtt))
590 174431 : rate = div64_ul(rate, cong->smoothed_rtt);
591 :
592 374118 : WRITE_ONCE(cong->pacing_rate, min_t(u64, rate, max_rate));
593 374118 : pr_debug("%s: update pacing rate: %u, max rate: %u, srtt: %u\n",
594 : __func__, cong->pacing_rate, max_rate, cong->smoothed_rtt);
595 374118 : }
596 :
597 6484760 : void quic_cong_on_packet_sent(struct quic_cong *cong, u32 time, u32 bytes, s64 number)
598 : {
599 6484760 : if (!bytes)
600 : return;
601 6484760 : if (cong->ops->on_packet_sent)
602 4665494 : cong->ops->on_packet_sent(cong, time, bytes, number);
603 6484760 : quic_cong_update_pacing_time(cong, bytes);
604 : }
605 :
606 428531 : void quic_cong_on_ack_recv(struct quic_cong *cong, u32 bytes, u32 max_rate)
607 : {
608 428531 : if (!bytes)
609 : return;
610 374118 : if (cong->ops->on_ack_recv)
611 0 : cong->ops->on_ack_recv(cong, bytes, max_rate);
612 374118 : quic_cong_pace_update(cong, bytes, max_rate);
613 : }
614 :
615 : /* rfc9002#section-5: Estimating the Round-Trip Time */
616 369410 : void quic_cong_rtt_update(struct quic_cong *cong, u32 time, u32 ack_delay)
617 : {
618 369410 : u32 adjusted_rtt, rttvar_sample;
619 :
620 : /* Ignore RTT sample if ACK delay is suspiciously large. */
621 369410 : if (ack_delay > cong->max_ack_delay * 2)
622 : return;
623 :
624 : /* rfc9002#section-5.1: latest_rtt = ack_time - send_time_of_largest_acked */
625 369270 : cong->latest_rtt = cong->time - time;
626 :
627 : /* rfc9002#section-5.2: Estimating min_rtt */
628 369270 : if (!cong->min_rtt_valid) {
629 1007 : cong->min_rtt = cong->latest_rtt;
630 1007 : cong->min_rtt_valid = 1;
631 : }
632 369270 : if (cong->min_rtt > cong->latest_rtt)
633 702 : cong->min_rtt = cong->latest_rtt;
634 :
635 369270 : if (!cong->is_rtt_set) {
636 : /* rfc9002#section-5.3:
637 : * smoothed_rtt = latest_rtt
638 : * rttvar = latest_rtt / 2
639 : */
640 979 : cong->smoothed_rtt = cong->latest_rtt;
641 979 : cong->rttvar = cong->smoothed_rtt / 2;
642 979 : quic_cong_pto_update(cong);
643 979 : cong->is_rtt_set = 1;
644 979 : return;
645 : }
646 :
647 : /* rfc9002#section-5.3:
648 : * adjusted_rtt = latest_rtt
649 : * if (latest_rtt >= min_rtt + ack_delay):
650 : * adjusted_rtt = latest_rtt - ack_delay
651 : * smoothed_rtt = 7/8 * smoothed_rtt + 1/8 * adjusted_rtt
652 : * rttvar_sample = abs(smoothed_rtt - adjusted_rtt)
653 : * rttvar = 3/4 * rttvar + 1/4 * rttvar_sample
654 : */
655 368291 : adjusted_rtt = cong->latest_rtt;
656 368291 : if (cong->latest_rtt >= cong->min_rtt + ack_delay)
657 366664 : adjusted_rtt = cong->latest_rtt - ack_delay;
658 :
659 368291 : cong->smoothed_rtt = (cong->smoothed_rtt * 7 + adjusted_rtt) / 8;
660 368291 : if (cong->smoothed_rtt >= adjusted_rtt)
661 323107 : rttvar_sample = cong->smoothed_rtt - adjusted_rtt;
662 : else
663 45184 : rttvar_sample = adjusted_rtt - cong->smoothed_rtt;
664 368291 : cong->rttvar = (cong->rttvar * 3 + rttvar_sample) / 4;
665 368291 : quic_cong_pto_update(cong);
666 :
667 368291 : if (cong->ops->on_rtt_update)
668 114260 : cong->ops->on_rtt_update(cong);
669 : }
670 :
671 1127 : void quic_cong_set_algo(struct quic_cong *cong, u8 algo)
672 : {
673 8 : if (algo >= QUIC_CONG_ALG_MAX)
674 0 : algo = QUIC_CONG_ALG_RENO;
675 :
676 1127 : cong->state = QUIC_CONG_SLOW_START;
677 1127 : cong->ssthresh = U32_MAX;
678 1127 : cong->ops = &quic_congs[algo];
679 8 : cong->ops->on_init(cong);
680 8 : }
681 :
682 2035 : void quic_cong_set_srtt(struct quic_cong *cong, u32 srtt)
683 : {
684 : /* rfc9002#section-5.3:
685 : * smoothed_rtt = kInitialRtt
686 : * rttvar = kInitialRtt / 2
687 : */
688 2035 : cong->latest_rtt = srtt;
689 2035 : cong->smoothed_rtt = cong->latest_rtt;
690 2035 : cong->rttvar = cong->smoothed_rtt / 2;
691 916 : quic_cong_pto_update(cong);
692 916 : }
693 :
694 1119 : void quic_cong_init(struct quic_cong *cong)
695 : {
696 1119 : cong->max_ack_delay = QUIC_DEF_ACK_DELAY;
697 1119 : cong->max_window = S32_MAX / 2;
698 1119 : quic_cong_set_algo(cong, QUIC_CONG_ALG_RENO);
699 1119 : quic_cong_set_srtt(cong, QUIC_RTT_INIT);
700 1119 : }
|