Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /* QUIC kernel implementation
3 : * (C) Copyright Red Hat Corp. 2023
4 : *
5 : * This file is part of the QUIC kernel implementation
6 : *
7 : * Initialization/cleanup for QUIC protocol support.
8 : *
9 : * Written or modified by:
10 : * Xin Long <lucien.xin@gmail.com>
11 : */
12 :
13 : #include <net/udp_tunnel.h>
14 : #include <linux/quic.h>
15 :
16 : #include "common.h"
17 : #include "family.h"
18 : #include "path.h"
19 :
20 : static int (*quic_path_rcv)(struct sk_buff *skb, u8 err);
21 : static struct workqueue_struct *quic_wq __read_mostly;
22 :
23 6573275 : static int quic_udp_rcv(struct sock *sk, struct sk_buff *skb)
24 : {
25 6573419 : if (skb_linearize(skb))
26 : return 0;
27 :
28 6573275 : memset(skb->cb, 0, sizeof(skb->cb));
29 6573275 : QUIC_SKB_CB(skb)->seqno = -1;
30 6573275 : QUIC_SKB_CB(skb)->udph_offset = skb->transport_header;
31 6573275 : QUIC_SKB_CB(skb)->time = jiffies_to_usecs(jiffies);
32 6573393 : skb_set_transport_header(skb, sizeof(struct udphdr));
33 6573393 : quic_path_rcv(skb, 0);
34 6573393 : return 0;
35 : }
36 :
37 3287 : static int quic_udp_err(struct sock *sk, struct sk_buff *skb)
38 : {
39 3287 : if (skb_linearize(skb))
40 : return 0;
41 :
42 3287 : QUIC_SKB_CB(skb)->udph_offset = skb->transport_header;
43 3287 : return quic_path_rcv(skb, 1);
44 : }
45 :
46 760 : static void quic_udp_sock_put_work(struct work_struct *work)
47 : {
48 760 : struct quic_udp_sock *us = container_of(work, struct quic_udp_sock, work);
49 760 : struct quic_hash_head *head;
50 :
51 760 : head = quic_udp_sock_head(sock_net(us->sk), ntohs(us->addr.v4.sin_port));
52 760 : mutex_lock(&head->m_lock);
53 760 : __hlist_del(&us->node);
54 760 : udp_tunnel_sock_release(us->sk->sk_socket);
55 759 : mutex_unlock(&head->m_lock);
56 760 : kfree(us);
57 760 : }
58 :
59 761 : static struct quic_udp_sock *quic_udp_sock_create(struct sock *sk, union quic_addr *a)
60 : {
61 761 : struct udp_tunnel_sock_cfg tuncfg = {};
62 761 : struct udp_port_cfg udp_conf = {};
63 761 : struct net *net = sock_net(sk);
64 761 : struct quic_hash_head *head;
65 761 : struct quic_udp_sock *us;
66 761 : struct socket *sock;
67 :
68 761 : us = kzalloc(sizeof(*us), GFP_KERNEL);
69 761 : if (!us)
70 : return NULL;
71 :
72 761 : quic_udp_conf_init(sk, &udp_conf, a);
73 761 : if (udp_sock_create(net, &udp_conf, &sock)) {
74 0 : pr_debug("%s: failed to create udp sock\n", __func__);
75 0 : kfree(us);
76 0 : return NULL;
77 : }
78 :
79 761 : tuncfg.encap_type = 1;
80 761 : tuncfg.encap_rcv = quic_udp_rcv;
81 761 : tuncfg.encap_err_lookup = quic_udp_err;
82 761 : setup_udp_tunnel_sock(net, sock, &tuncfg);
83 :
84 761 : refcount_set(&us->refcnt, 1);
85 761 : us->sk = sock->sk;
86 1522 : memcpy(&us->addr, a, sizeof(*a));
87 :
88 761 : head = quic_udp_sock_head(net, ntohs(a->v4.sin_port));
89 761 : hlist_add_head(&us->node, &head->head);
90 761 : INIT_WORK(&us->work, quic_udp_sock_put_work);
91 :
92 761 : return us;
93 : }
94 :
95 581 : static bool quic_udp_sock_get(struct quic_udp_sock *us)
96 : {
97 581 : return (us && refcount_inc_not_zero(&us->refcnt));
98 : }
99 :
100 3498 : static void quic_udp_sock_put(struct quic_udp_sock *us)
101 : {
102 3498 : if (us && refcount_dec_and_test(&us->refcnt))
103 760 : queue_work(quic_wq, &us->work);
104 3498 : }
105 :
106 : /* Lookup a quic_udp_sock in the global hash table. If not found, creates and returns a new one
107 : * associated with the given kernel socket.
108 : */
109 1181 : static struct quic_udp_sock *quic_udp_sock_lookup(struct sock *sk, union quic_addr *a, u16 port)
110 : {
111 1181 : struct net *net = sock_net(sk);
112 1181 : struct quic_hash_head *head;
113 1181 : struct quic_udp_sock *us;
114 :
115 1181 : head = quic_udp_sock_head(net, port);
116 2375 : hlist_for_each_entry(us, &head->head, node) {
117 433 : if (net != sock_net(us->sk))
118 10 : continue;
119 423 : if (a) {
120 423 : if (quic_cmp_sk_addr(us->sk, &us->addr, a))
121 420 : return us;
122 3 : continue;
123 : }
124 0 : if (ntohs(us->addr.v4.sin_port) == port)
125 0 : return us;
126 : }
127 : return NULL;
128 : }
129 :
130 : /* Binds a QUIC path to a local port and sets up a UDP socket. */
131 1181 : int quic_path_bind(struct sock *sk, struct quic_path_group *paths, u8 path)
132 : {
133 1181 : union quic_addr *a = quic_path_saddr(paths, path);
134 1181 : int rover, low, high, remaining;
135 1181 : struct net *net = sock_net(sk);
136 1181 : struct quic_hash_head *head;
137 1181 : struct quic_udp_sock *us;
138 1181 : u16 port;
139 :
140 1181 : port = ntohs(a->v4.sin_port);
141 1181 : if (port) {
142 581 : head = quic_udp_sock_head(net, port);
143 581 : mutex_lock(&head->m_lock);
144 581 : us = quic_udp_sock_lookup(sk, a, port);
145 581 : if (!quic_udp_sock_get(us)) {
146 161 : us = quic_udp_sock_create(sk, a);
147 161 : if (!us) {
148 0 : mutex_unlock(&head->m_lock);
149 0 : return -EINVAL;
150 : }
151 : }
152 581 : mutex_unlock(&head->m_lock);
153 :
154 581 : quic_udp_sock_put(paths->path[path].udp_sk);
155 581 : paths->path[path].udp_sk = us;
156 581 : return 0;
157 : }
158 :
159 600 : inet_get_local_port_range(net, &low, &high);
160 600 : remaining = (high - low) + 1;
161 600 : rover = (int)(((u64)get_random_u32() * remaining) >> 32) + low;
162 600 : do {
163 600 : rover++;
164 600 : if (rover < low || rover > high)
165 0 : rover = low;
166 600 : port = (u16)rover;
167 600 : if (inet_is_local_reserved_port(net, port))
168 0 : continue;
169 :
170 600 : head = quic_udp_sock_head(net, port);
171 600 : mutex_lock(&head->m_lock);
172 600 : if (quic_udp_sock_lookup(sk, NULL, port)) {
173 0 : mutex_unlock(&head->m_lock);
174 0 : cond_resched();
175 0 : continue;
176 : }
177 600 : a->v4.sin_port = htons(port);
178 600 : us = quic_udp_sock_create(sk, a);
179 600 : if (!us) {
180 0 : a->v4.sin_port = 0;
181 0 : mutex_unlock(&head->m_lock);
182 0 : return -EINVAL;
183 : }
184 600 : mutex_unlock(&head->m_lock);
185 :
186 600 : quic_udp_sock_put(paths->path[path].udp_sk);
187 600 : paths->path[path].udp_sk = us;
188 600 : __sk_dst_reset(sk);
189 600 : return 0;
190 0 : } while (--remaining > 0);
191 :
192 : return -EADDRINUSE;
193 : }
194 :
195 : /* Swaps the active and alternate QUIC paths.
196 : *
197 : * Promotes the alternate path (path[1]) to become the new active path (path[0]). If the
198 : * alternate path has a valid UDP socket, the entire path is swapped. Otherwise, only the
199 : * destination address is exchanged, assuming the source address is the same and no rebind is
200 : * needed.
201 : *
202 : * This is typically used during path migration or alternate path promotion.
203 : */
204 97 : void quic_path_swap(struct quic_path_group *paths)
205 : {
206 97 : struct quic_path path = paths->path[0];
207 :
208 97 : paths->alt_probes = 0;
209 97 : paths->alt_state = QUIC_PATH_ALT_SWAPPED;
210 :
211 97 : if (paths->path[1].udp_sk) {
212 62 : paths->path[0] = paths->path[1];
213 62 : paths->path[1] = path;
214 62 : return;
215 : }
216 :
217 35 : paths->path[0].daddr = paths->path[1].daddr;
218 35 : paths->path[1].daddr = path.daddr;
219 : }
220 :
221 : /* Frees resources associated with a QUIC path.
222 : *
223 : * This is used for cleanup during error handling or when the path is no longer needed.
224 : */
225 2317 : void quic_path_free(struct sock *sk, struct quic_path_group *paths, u8 path)
226 : {
227 2317 : paths->alt_probes = 0;
228 2317 : paths->alt_state = QUIC_PATH_ALT_NONE;
229 :
230 2317 : quic_udp_sock_put(paths->path[path].udp_sk);
231 2317 : paths->path[path].udp_sk = NULL;
232 2317 : memset(quic_path_daddr(paths, path), 0, sizeof(union quic_addr));
233 2317 : memset(quic_path_saddr(paths, path), 0, sizeof(union quic_addr));
234 2317 : }
235 :
236 : /* Detects and records a potential alternate path.
237 : *
238 : * If the new source or destination address differs from the active path, and alternate path
239 : * detection is not disabled, the function pdates the alternate path slot (path[1]) with the
240 : * new addresses.
241 : *
242 : * This is typically called on packet receive to detect new possible network paths (e.g., NAT
243 : * rebinding, mobility).
244 : *
245 : * Returns 1 if a new alternate path was detected and updated, 0 otherwise.
246 : */
247 6570067 : int quic_path_detect_alt(struct quic_path_group *paths, union quic_addr *sa, union quic_addr *da,
248 : struct sock *sk)
249 : {
250 13137105 : if ((!quic_cmp_sk_addr(sk, quic_path_saddr(paths, 0), sa) && !paths->disable_saddr_alt) ||
251 6570120 : (!quic_cmp_sk_addr(sk, quic_path_daddr(paths, 0), da) && !paths->disable_daddr_alt)) {
252 259 : if (!quic_path_saddr(paths, 1)->v4.sin_port)
253 28 : quic_path_set_saddr(paths, 1, sa);
254 :
255 259 : if (!quic_cmp_sk_addr(sk, quic_path_saddr(paths, 1), sa))
256 : return 0;
257 :
258 259 : if (!quic_path_daddr(paths, 1)->v4.sin_port)
259 55 : quic_path_set_daddr(paths, 1, da);
260 :
261 259 : return quic_cmp_sk_addr(sk, quic_path_daddr(paths, 1), da);
262 : }
263 : return 0;
264 : }
265 :
266 2079 : void quic_path_get_param(struct quic_path_group *paths, struct quic_transport_param *p)
267 : {
268 2079 : if (p->remote) {
269 24 : p->disable_active_migration = paths->disable_saddr_alt;
270 24 : return;
271 : }
272 2055 : p->disable_active_migration = paths->disable_daddr_alt;
273 : }
274 :
275 3172 : void quic_path_set_param(struct quic_path_group *paths, struct quic_transport_param *p)
276 : {
277 3172 : if (p->remote) {
278 1004 : paths->disable_saddr_alt = p->disable_active_migration;
279 1004 : return;
280 : }
281 2168 : paths->disable_daddr_alt = p->disable_active_migration;
282 : }
283 :
284 : /* State Machine defined in rfc8899#section-5.2 */
285 : enum quic_plpmtud_state {
286 : QUIC_PL_DISABLED,
287 : QUIC_PL_BASE,
288 : QUIC_PL_SEARCH,
289 : QUIC_PL_COMPLETE,
290 : QUIC_PL_ERROR,
291 : };
292 :
293 : #define QUIC_BASE_PLPMTU 1200
294 : #define QUIC_MAX_PLPMTU 9000
295 : #define QUIC_MIN_PLPMTU 512
296 :
297 : #define QUIC_MAX_PROBES 3
298 :
299 : #define QUIC_PL_BIG_STEP 32
300 : #define QUIC_PL_MIN_STEP 4
301 :
302 : /* Handle PLPMTUD probe failure on a QUIC path.
303 : *
304 : * Called immediately after sending a probe packet in QUIC Path MTU Discovery. Tracks probe
305 : * count and manages state transitions based on the number of probes sent and current PLPMTUD
306 : * state (BASE, SEARCH, COMPLETE, ERROR). Detects probe failures and black holes, adjusting
307 : * PMTU and probe sizes accordingly.
308 : *
309 : * Return: New PMTU value if updated, else 0.
310 : */
311 44 : u32 quic_path_pl_send(struct quic_path_group *paths, s64 number)
312 : {
313 44 : u32 pathmtu = 0;
314 :
315 44 : paths->pl.number = number;
316 44 : if (paths->pl.probe_count < QUIC_MAX_PROBES)
317 42 : goto out;
318 :
319 2 : paths->pl.probe_count = 0;
320 2 : if (paths->pl.state == QUIC_PL_BASE) {
321 0 : if (paths->pl.probe_size == QUIC_BASE_PLPMTU) { /* BASE_PLPMTU Confirming Failed */
322 0 : paths->pl.state = QUIC_PL_ERROR; /* Base -> Error */
323 :
324 0 : paths->pl.pmtu = QUIC_BASE_PLPMTU;
325 0 : pathmtu = QUIC_BASE_PLPMTU;
326 : }
327 2 : } else if (paths->pl.state == QUIC_PL_SEARCH) {
328 2 : if (paths->pl.pmtu == paths->pl.probe_size) { /* Black Hole Detected */
329 0 : paths->pl.state = QUIC_PL_BASE; /* Search -> Base */
330 0 : paths->pl.probe_size = QUIC_BASE_PLPMTU;
331 0 : paths->pl.probe_high = 0;
332 :
333 0 : paths->pl.pmtu = QUIC_BASE_PLPMTU;
334 0 : pathmtu = QUIC_BASE_PLPMTU;
335 : } else { /* Normal probe failure. */
336 2 : paths->pl.probe_high = paths->pl.probe_size;
337 2 : paths->pl.probe_size = paths->pl.pmtu;
338 : }
339 0 : } else if (paths->pl.state == QUIC_PL_COMPLETE) {
340 0 : if (paths->pl.pmtu == paths->pl.probe_size) { /* Black Hole Detected */
341 0 : paths->pl.state = QUIC_PL_BASE; /* Search Complete -> Base */
342 0 : paths->pl.probe_size = QUIC_BASE_PLPMTU;
343 :
344 0 : paths->pl.pmtu = QUIC_BASE_PLPMTU;
345 0 : pathmtu = QUIC_BASE_PLPMTU;
346 : }
347 : }
348 :
349 0 : out:
350 44 : pr_debug("%s: dst: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, paths,
351 : paths->pl.state, paths->pl.pmtu, paths->pl.probe_size, paths->pl.probe_high);
352 44 : paths->pl.probe_count++;
353 44 : return pathmtu;
354 : }
355 :
356 : /* Handle successful reception of a PMTU probe.
357 : *
358 : * Called when a probe packet is acknowledged. Updates probe size and transitions state if
359 : * needed (e.g., from SEARCH to COMPLETE). Expands PMTU using binary or linear search
360 : * depending on state.
361 : *
362 : * Return: New PMTU to apply if search completes, or 0 if no change.
363 : */
364 28 : u32 quic_path_pl_recv(struct quic_path_group *paths, bool *raise_timer, bool *complete)
365 : {
366 28 : u32 pathmtu = 0;
367 :
368 28 : pr_debug("%s: dst: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, paths,
369 : paths->pl.state, paths->pl.pmtu, paths->pl.probe_size, paths->pl.probe_high);
370 :
371 28 : *raise_timer = false;
372 28 : paths->pl.number = 0;
373 28 : paths->pl.pmtu = paths->pl.probe_size;
374 28 : paths->pl.probe_count = 0;
375 28 : if (paths->pl.state == QUIC_PL_BASE) {
376 5 : paths->pl.state = QUIC_PL_SEARCH; /* Base -> Search */
377 5 : paths->pl.probe_size += QUIC_PL_BIG_STEP;
378 23 : } else if (paths->pl.state == QUIC_PL_ERROR) {
379 0 : paths->pl.state = QUIC_PL_SEARCH; /* Error -> Search */
380 :
381 0 : paths->pl.pmtu = paths->pl.probe_size;
382 0 : pathmtu = (u32)paths->pl.pmtu;
383 0 : paths->pl.probe_size += QUIC_PL_BIG_STEP;
384 23 : } else if (paths->pl.state == QUIC_PL_SEARCH) {
385 23 : if (!paths->pl.probe_high) {
386 21 : if (paths->pl.probe_size < QUIC_MAX_PLPMTU) {
387 21 : paths->pl.probe_size =
388 21 : (u16)min(paths->pl.probe_size + QUIC_PL_BIG_STEP,
389 : QUIC_MAX_PLPMTU);
390 21 : *complete = false;
391 21 : return pathmtu;
392 : }
393 0 : paths->pl.probe_high = QUIC_MAX_PLPMTU;
394 : }
395 2 : paths->pl.probe_size += QUIC_PL_MIN_STEP;
396 2 : if (paths->pl.probe_size >= paths->pl.probe_high) {
397 1 : paths->pl.probe_high = 0;
398 1 : paths->pl.state = QUIC_PL_COMPLETE; /* Search -> Search Complete */
399 :
400 1 : paths->pl.probe_size = paths->pl.pmtu;
401 1 : pathmtu = (u32)paths->pl.pmtu;
402 1 : *raise_timer = true;
403 : }
404 0 : } else if (paths->pl.state == QUIC_PL_COMPLETE) {
405 : /* Raise probe_size again after 30 * interval in Search Complete */
406 0 : paths->pl.state = QUIC_PL_SEARCH; /* Search Complete -> Search */
407 0 : paths->pl.probe_size = (u16)min(paths->pl.probe_size + QUIC_PL_MIN_STEP,
408 : QUIC_MAX_PLPMTU);
409 : }
410 :
411 7 : *complete = (paths->pl.state == QUIC_PL_COMPLETE);
412 7 : return pathmtu;
413 : }
414 :
415 : /* Handle ICMP "Packet Too Big" messages.
416 : *
417 : * Responds to an incoming ICMP error by reducing the probe size or falling back to a safe
418 : * baseline PMTU depending on current state. Also handles cases where the PMTU hint lies
419 : * between probe and current PMTU.
420 : *
421 : * Return: New PMTU to apply if state changes, or 0 if no change.
422 : */
423 1974 : u32 quic_path_pl_toobig(struct quic_path_group *paths, u32 pmtu, bool *reset_timer)
424 : {
425 1974 : u32 pathmtu = 0;
426 :
427 1974 : pr_debug("%s: dst: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n", __func__, paths,
428 : paths->pl.state, paths->pl.pmtu, paths->pl.probe_size, pmtu);
429 :
430 1974 : *reset_timer = false;
431 1974 : if (pmtu < QUIC_MIN_PLPMTU || pmtu >= (u32)paths->pl.probe_size)
432 : return pathmtu;
433 :
434 42 : if (paths->pl.state == QUIC_PL_BASE) {
435 0 : if (pmtu >= QUIC_MIN_PLPMTU && pmtu < QUIC_BASE_PLPMTU) {
436 0 : paths->pl.state = QUIC_PL_ERROR; /* Base -> Error */
437 :
438 0 : paths->pl.pmtu = QUIC_BASE_PLPMTU;
439 0 : pathmtu = QUIC_BASE_PLPMTU;
440 : }
441 42 : } else if (paths->pl.state == QUIC_PL_SEARCH) {
442 42 : if (pmtu >= QUIC_BASE_PLPMTU && pmtu < (u32)paths->pl.pmtu) {
443 0 : paths->pl.state = QUIC_PL_BASE; /* Search -> Base */
444 0 : paths->pl.probe_size = QUIC_BASE_PLPMTU;
445 0 : paths->pl.probe_count = 0;
446 :
447 0 : paths->pl.probe_high = 0;
448 0 : paths->pl.pmtu = QUIC_BASE_PLPMTU;
449 0 : pathmtu = QUIC_BASE_PLPMTU;
450 42 : } else if (pmtu > (u32)paths->pl.pmtu && pmtu < (u32)paths->pl.probe_size) {
451 3 : paths->pl.probe_size = (u16)pmtu;
452 3 : paths->pl.probe_count = 0;
453 : }
454 0 : } else if (paths->pl.state == QUIC_PL_COMPLETE) {
455 0 : if (pmtu >= QUIC_BASE_PLPMTU && pmtu < (u32)paths->pl.pmtu) {
456 0 : paths->pl.state = QUIC_PL_BASE; /* Complete -> Base */
457 0 : paths->pl.probe_size = QUIC_BASE_PLPMTU;
458 0 : paths->pl.probe_count = 0;
459 :
460 0 : paths->pl.probe_high = 0;
461 0 : paths->pl.pmtu = QUIC_BASE_PLPMTU;
462 0 : pathmtu = QUIC_BASE_PLPMTU;
463 0 : *reset_timer = true;
464 : }
465 : }
466 : return pathmtu;
467 : }
468 :
469 : /* Reset PLPMTUD state for a path.
470 : *
471 : * Resets all PLPMTUD-related state to its initial configuration. Called when a new path is
472 : * initialized or when recovering from errors.
473 : */
474 1911 : void quic_path_pl_reset(struct quic_path_group *paths)
475 : {
476 1911 : paths->pl.number = 0;
477 1911 : paths->pl.state = QUIC_PL_BASE;
478 1911 : paths->pl.pmtu = QUIC_BASE_PLPMTU;
479 1911 : paths->pl.probe_size = QUIC_BASE_PLPMTU;
480 1911 : }
481 :
482 : /* Check if a packet number confirms PLPMTUD probe.
483 : *
484 : * Checks whether the last probe (tracked by .number) has been acknowledged. If the probe
485 : * number lies within the ACK range, confirmation is successful.
486 : *
487 : * Return: true if probe is confirmed, false otherwise.
488 : */
489 424632 : bool quic_path_pl_confirm(struct quic_path_group *paths, s64 largest, s64 smallest)
490 : {
491 424632 : return paths->pl.number && paths->pl.number >= smallest && paths->pl.number <= largest;
492 : }
493 :
494 1 : int quic_path_init(int (*rcv)(struct sk_buff *skb, u8 err))
495 : {
496 1 : quic_wq = create_workqueue("quic_workqueue");
497 1 : if (!quic_wq)
498 : return -ENOMEM;
499 :
500 1 : quic_path_rcv = rcv;
501 1 : return 0;
502 : }
503 :
504 0 : void quic_path_destroy(void)
505 : {
506 0 : destroy_workqueue(quic_wq);
507 0 : }
|