Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /* QUIC kernel implementation
3 : * (C) Copyright Red Hat Corp. 2023
4 : *
5 : * This file is part of the QUIC kernel implementation
6 : *
7 : * Initialization/cleanup for QUIC protocol support.
8 : *
9 : * Written or modified by:
10 : * Xin Long <lucien.xin@gmail.com>
11 : */
12 :
13 : #include "socket.h"
14 :
15 : #define QUIC_HLEN 1
16 :
17 : #define QUIC_LONG_HLEN(dcid, scid) \
18 : (QUIC_HLEN + QUIC_VERSION_LEN + 1 + (dcid)->len + 1 + (scid)->len)
19 :
20 : #define QUIC_VERSION_NUM 2
21 :
22 : /* Supported QUIC versions and their compatible versions. Used for Compatible Version
23 : * Negotiation in rfc9368#section-2.3.
24 : */
25 : static u32 quic_versions[QUIC_VERSION_NUM][4] = {
26 : /* Version, Compatible Versions */
27 : { QUIC_VERSION_V1, QUIC_VERSION_V2, QUIC_VERSION_V1, 0 },
28 : { QUIC_VERSION_V2, QUIC_VERSION_V2, QUIC_VERSION_V1, 0 },
29 : };
30 :
31 : /* Get the compatible version list for a given QUIC version. */
32 4173 : u32 *quic_packet_compatible_versions(u32 version)
33 : {
34 4173 : u8 i;
35 :
36 5914 : for (i = 0; i < QUIC_VERSION_NUM; i++)
37 5200 : if (version == quic_versions[i][0])
38 3473 : return quic_versions[i];
39 : return NULL;
40 : }
41 :
42 : /* Convert version-specific type to internal standard packet type. */
43 5521 : static u8 quic_packet_version_get_type(u32 version, u8 type)
44 : {
45 945 : if (version == QUIC_VERSION_V1)
46 : return type;
47 :
48 99 : switch (type) {
49 : case QUIC_PACKET_INITIAL_V2:
50 : return QUIC_PACKET_INITIAL;
51 : case QUIC_PACKET_0RTT_V2:
52 : return QUIC_PACKET_0RTT;
53 : case QUIC_PACKET_HANDSHAKE_V2:
54 : return QUIC_PACKET_HANDSHAKE;
55 : case QUIC_PACKET_RETRY_V2:
56 : return QUIC_PACKET_RETRY;
57 : default:
58 : return -1;
59 : }
60 : return -1;
61 : }
62 :
63 : /* Convert internal standard packet type to version-specific type. */
64 5588 : static u8 quic_packet_version_put_type(u32 version, u8 type)
65 : {
66 5588 : if (version == QUIC_VERSION_V1)
67 : return type;
68 :
69 57 : switch (type) {
70 : case QUIC_PACKET_INITIAL:
71 : return QUIC_PACKET_INITIAL_V2;
72 : case QUIC_PACKET_0RTT:
73 : return QUIC_PACKET_0RTT_V2;
74 : case QUIC_PACKET_HANDSHAKE:
75 : return QUIC_PACKET_HANDSHAKE_V2;
76 : case QUIC_PACKET_RETRY:
77 : return QUIC_PACKET_RETRY_V2;
78 : default:
79 : return -1;
80 : }
81 : return -1;
82 : }
83 :
84 : /* Parse QUIC version and connection IDs (DCID and SCID) from a Long header packet buffer. */
85 6066 : static int quic_packet_get_version_and_connid(struct quic_conn_id *dcid, struct quic_conn_id *scid,
86 : u32 *version, u8 **pp, u32 *plen)
87 : {
88 6066 : u64 len, v;
89 :
90 6066 : *pp += QUIC_HLEN;
91 6066 : *plen -= QUIC_HLEN;
92 :
93 6066 : if (!quic_get_int(pp, plen, &v, QUIC_VERSION_LEN))
94 : return -EINVAL;
95 6066 : *version = v;
96 :
97 6066 : if (!quic_get_int(pp, plen, &len, 1) ||
98 6066 : len > *plen || len > QUIC_CONN_ID_MAX_LEN)
99 : return -EINVAL;
100 6066 : quic_conn_id_update(dcid, *pp, len);
101 6066 : *plen -= len;
102 6066 : *pp += len;
103 :
104 6066 : if (!quic_get_int(pp, plen, &len, 1) ||
105 6066 : len > *plen || len > QUIC_CONN_ID_MAX_LEN)
106 : return -EINVAL;
107 6066 : quic_conn_id_update(scid, *pp, len);
108 6066 : *plen -= len;
109 6066 : *pp += len;
110 6066 : return 0;
111 : }
112 :
113 : /* Change the QUIC version for the connection.
114 : *
115 : * Frees existing initial crypto keys and installs new initial keys compatible with the new
116 : * version.
117 : */
118 23 : static int quic_packet_version_change(struct sock *sk, struct quic_conn_id *dcid, u32 version)
119 : {
120 23 : struct quic_crypto *crypto = quic_crypto(sk, QUIC_CRYPTO_INITIAL);
121 :
122 23 : if (quic_crypto_initial_keys_install(crypto, dcid, version, quic_is_serv(sk)))
123 : return -1;
124 :
125 23 : quic_packet(sk)->version = version;
126 23 : return 0;
127 : }
128 :
129 : /* Select the best compatible QUIC version from offered list.
130 : *
131 : * Considers the local preferred version, currently chosen version, and versions offered by
132 : * the peer. Selects the best compatible version based on client/server role and updates the
133 : * connection version accordingly.
134 : */
135 994 : int quic_packet_select_version(struct sock *sk, u32 *versions, u8 count)
136 : {
137 994 : struct quic_packet *packet = quic_packet(sk);
138 994 : struct quic_config *c = quic_config(sk);
139 994 : u8 i, pref_found = 0, ch_found = 0;
140 994 : u32 preferred, chosen, best = 0;
141 :
142 994 : preferred = c->version ?: QUIC_VERSION_V1;
143 994 : chosen = packet->version;
144 :
145 3241 : for (i = 0; i < count; i++) {
146 4494 : if (!quic_packet_compatible_versions(versions[i]))
147 254 : continue;
148 1993 : if (preferred == versions[i])
149 1708 : pref_found = 1;
150 1993 : if (chosen == versions[i])
151 1726 : ch_found = 1;
152 1993 : if (best < versions[i]) /* Track highest offered version. */
153 : best = versions[i];
154 : }
155 :
156 994 : if (!pref_found && !ch_found && !best)
157 : return -1;
158 :
159 994 : if (quic_is_serv(sk)) { /* Server prefers preferred version if offered, else chosen. */
160 420 : if (pref_found)
161 : best = preferred;
162 0 : else if (ch_found)
163 0 : best = chosen;
164 : } else { /* Client prefers chosen version, else preferred. */
165 574 : if (ch_found)
166 : best = chosen;
167 0 : else if (pref_found)
168 0 : best = preferred;
169 : }
170 :
171 994 : if (packet->version == best)
172 : return 0;
173 :
174 : /* Change to selected best version. */
175 3 : return quic_packet_version_change(sk, &quic_paths(sk)->orig_dcid, best);
176 : }
177 :
178 : /* Extracts a QUIC token from a buffer in the Client Initial packet. */
179 2658 : static int quic_packet_get_token(struct quic_data *token, u8 **pp, u32 *plen)
180 : {
181 2658 : u64 len;
182 :
183 2658 : if (!quic_get_var(pp, plen, &len) || len > *plen)
184 : return -EINVAL;
185 2658 : quic_data(token, *pp, len);
186 2658 : *plen -= len;
187 2658 : *pp += len;
188 2658 : return 0;
189 : }
190 :
191 : /* Process PMTU reduction event on a QUIC socket. */
192 1974 : void quic_packet_rcv_err_pmtu(struct sock *sk)
193 : {
194 1974 : struct quic_path_group *paths = quic_paths(sk);
195 1974 : struct quic_packet *packet = quic_packet(sk);
196 1974 : struct quic_config *c = quic_config(sk);
197 1974 : u32 pathmtu, info, taglen;
198 1974 : struct dst_entry *dst;
199 1974 : bool reset_timer;
200 :
201 1974 : if (!ip_sk_accept_pmtu(sk))
202 0 : return;
203 :
204 1974 : info = clamp(paths->mtu_info, QUIC_PATH_MIN_PMTU, QUIC_PATH_MAX_PMTU);
205 : /* If PLPMTUD is not enabled, update MSS using the route and ICMP info. */
206 1974 : if (!c->plpmtud_probe_interval) {
207 0 : if (quic_packet_route(sk) < 0)
208 : return;
209 :
210 0 : dst = __sk_dst_get(sk);
211 0 : dst->ops->update_pmtu(dst, sk, NULL, info, true);
212 0 : quic_packet_mss_update(sk, info - packet->hlen);
213 : /* Retransmit all outstanding data as MTU may have increased. */
214 0 : quic_outq_retransmit_mark(sk, QUIC_CRYPTO_APP, 1);
215 0 : quic_outq_update_loss_timer(sk);
216 0 : quic_outq_transmit(sk);
217 0 : return;
218 : }
219 : /* PLPMTUD is enabled: adjust to smaller PMTU, subtract headers and AEAD tag. Also
220 : * notify the QUIC path layer for possible state changes and probing.
221 : */
222 1974 : taglen = quic_packet_taglen(packet);
223 1974 : info = info - packet->hlen - taglen;
224 1974 : pathmtu = quic_path_pl_toobig(paths, info, &reset_timer);
225 1974 : if (reset_timer)
226 0 : quic_timer_reset(sk, QUIC_TIMER_PMTU, c->plpmtud_probe_interval);
227 1974 : if (pathmtu)
228 0 : quic_packet_mss_update(sk, pathmtu + taglen);
229 : }
230 :
231 : /* Handle ICMP Toobig packet and update QUIC socket path MTU. */
232 3287 : static int quic_packet_rcv_err(struct sk_buff *skb)
233 : {
234 3287 : union quic_addr daddr, saddr;
235 3287 : struct sock *sk = NULL;
236 3287 : int ret = 0;
237 3287 : u32 info;
238 :
239 : /* All we can do is lookup the matching QUIC socket by addresses. */
240 3287 : quic_get_msg_addrs(&saddr, &daddr, skb);
241 3287 : sk = quic_sock_lookup(skb, &daddr, &saddr, NULL);
242 3287 : if (!sk)
243 : return -ENOENT;
244 :
245 3242 : bh_lock_sock(sk);
246 3242 : if (quic_is_listen(sk))
247 0 : goto out;
248 :
249 3242 : if (quic_get_mtu_info(skb, &info))
250 349 : goto out;
251 :
252 2893 : ret = 1; /* Success: update socket path MTU info. */
253 2893 : quic_paths(sk)->mtu_info = info;
254 2893 : if (sock_owned_by_user(sk)) {
255 : /* Socket is in use by userspace context. Defer MTU processing to later via
256 : * tasklet. Ensure the socket is not dropped before deferral.
257 : */
258 1407 : if (!test_and_set_bit(QUIC_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
259 488 : sock_hold(sk);
260 1407 : goto out;
261 : }
262 : /* Otherwise, process the MTU reduction now. */
263 1486 : quic_packet_rcv_err_pmtu(sk);
264 3242 : out:
265 3242 : bh_unlock_sock(sk);
266 3242 : return ret;
267 : }
268 :
269 : #define TLS_MT_CLIENT_HELLO 1
270 : #define TLS_EXT_alpn 16
271 :
272 : /* TLS Client Hello Msg:
273 : *
274 : * uint16 ProtocolVersion;
275 : * opaque Random[32];
276 : * uint8 CipherSuite[2];
277 : *
278 : * struct {
279 : * ExtensionType extension_type;
280 : * opaque extension_data<0..2^16-1>;
281 : * } Extension;
282 : *
283 : * struct {
284 : * ProtocolVersion legacy_version = 0x0303;
285 : * Random rand;
286 : * opaque legacy_session_id<0..32>;
287 : * CipherSuite cipher_suites<2..2^16-2>;
288 : * opaque legacy_compression_methods<1..2^8-1>;
289 : * Extension extensions<8..2^16-1>;
290 : * } ClientHello;
291 : */
292 :
293 : #define TLS_CH_RANDOM_LEN 32
294 : #define TLS_CH_VERSION_LEN 2
295 :
296 : /* Extract ALPN data from a TLS ClientHello message.
297 : *
298 : * Parses the TLS ClientHello handshake message to find the ALPN (Application Layer Protocol
299 : * Negotiation) TLS extension. It validates the TLS ClientHello structure, including version,
300 : * random, session ID, cipher suites, compression methods, and extensions. Once the ALPN
301 : * extension is found, the ALPN protocols list is extracted and stored in @alpn.
302 : *
303 : * Return: 0 on success or no ALPN found, a negative error code on failed parsing.
304 : */
305 418 : static int quic_packet_get_alpn(struct quic_data *alpn, u8 *p, u32 len)
306 : {
307 418 : int err = -EINVAL, found = 0;
308 418 : u64 length, type;
309 :
310 : /* Verify handshake message type (ClientHello) and its length. */
311 418 : if (!quic_get_int(&p, &len, &type, 1) || type != TLS_MT_CLIENT_HELLO)
312 : return err;
313 418 : if (!quic_get_int(&p, &len, &length, 3) ||
314 418 : length < TLS_CH_RANDOM_LEN + TLS_CH_VERSION_LEN)
315 : return err;
316 418 : if (len > (u32)length) /* Limit len to handshake message length if larger. */
317 0 : len = length;
318 : /* Skip legacy_version (2 bytes) + random (32 bytes). */
319 418 : p += TLS_CH_RANDOM_LEN + TLS_CH_VERSION_LEN;
320 418 : len -= TLS_CH_RANDOM_LEN + TLS_CH_VERSION_LEN;
321 : /* legacy_session_id_len must be zero (QUIC requirement). */
322 418 : if (!quic_get_int(&p, &len, &length, 1) || length)
323 : return err;
324 :
325 : /* Skip cipher_suites (2 bytes length + variable data). */
326 418 : if (!quic_get_int(&p, &len, &length, 2) || length > (u64)len)
327 : return err;
328 418 : len -= length;
329 418 : p += length;
330 :
331 : /* Skip legacy_compression_methods (1 byte length + variable data). */
332 418 : if (!quic_get_int(&p, &len, &length, 1) || length > (u64)len)
333 : return err;
334 418 : len -= length;
335 418 : p += length;
336 :
337 418 : if (!quic_get_int(&p, &len, &length, 2)) /* Read TLS extensions length (2 bytes). */
338 : return err;
339 418 : if (len > (u32)length) /* Limit len to extensions length if larger. */
340 0 : len = length;
341 3130 : while (len > 4) { /* Iterate over extensions to find ALPN (type TLS_EXT_alpn). */
342 3120 : if (!quic_get_int(&p, &len, &type, 2))
343 : break;
344 3120 : if (!quic_get_int(&p, &len, &length, 2))
345 : break;
346 3120 : if (len < (u32)length) /* Incomplete TLS extensions. */
347 : return 0;
348 3074 : if (type == TLS_EXT_alpn) { /* Found ALPN extension. */
349 362 : len = length;
350 362 : found = 1;
351 362 : break;
352 : }
353 : /* Skip non-ALPN extensions. */
354 2712 : p += length;
355 2712 : len -= length;
356 : }
357 372 : if (!found) { /* no ALPN extension found: set alpn->len = 0 and alpn->data = p. */
358 10 : quic_data(alpn, p, 0);
359 10 : return 0;
360 : }
361 :
362 : /* Parse ALPN protocols list length (2 bytes). */
363 362 : if (!quic_get_int(&p, &len, &length, 2) || length > (u64)len)
364 : return err;
365 362 : quic_data(alpn, p, length); /* Store ALPN protocols list in alpn->data. */
366 362 : len = length;
367 800 : while (len) { /* Validate ALPN protocols list format. */
368 438 : if (!quic_get_int(&p, &len, &length, 1) || length > (u64)len) {
369 : /* Malformed ALPN entry: set alpn->len = 0 and alpn->data = NULL. */
370 0 : quic_data(alpn, NULL, 0);
371 0 : return err;
372 : }
373 438 : len -= length;
374 438 : p += length;
375 : }
376 362 : pr_debug("%s: alpn_len: %d\n", __func__, alpn->len);
377 : return 0;
378 : }
379 :
380 : /* Parse ALPN from a QUIC Initial packet.
381 : *
382 : * This function processes a QUIC Initial packet to extract the ALPN from the TLS ClientHello
383 : * message inside the QUIC CRYPTO frame. It verifies packet type, version compatibility,
384 : * decrypts the packet payload, and locates the CRYPTO frame to parse the TLS ClientHello.
385 : * Finally, it calls quic_packet_get_alpn() to extract the ALPN extension data.
386 : *
387 : * Return: 0 on success or no ALPN found, a negative error code on failed parsing.
388 : */
389 747 : static int quic_packet_parse_alpn(struct sk_buff *skb, struct quic_data *alpn)
390 : {
391 747 : struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
392 747 : struct net *net = dev_net(skb->dev);
393 747 : struct quic_net *qn = quic_net(net);
394 747 : u8 *p = skb->data, *data, type;
395 747 : struct quic_conn_id dcid, scid;
396 747 : u32 len = skb->len, version;
397 747 : struct quic_crypto *crypto;
398 747 : struct quic_data token;
399 747 : u64 offset, length;
400 747 : int err = -EINVAL;
401 :
402 747 : if (quic_packet_get_version_and_connid(&dcid, &scid, &version, &p, &len))
403 : return -EINVAL;
404 1494 : if (!quic_packet_compatible_versions(version))
405 : return 0;
406 : /* Only parse Initial packets. */
407 517 : type = quic_packet_version_get_type(version, quic_hshdr(skb)->type);
408 517 : if (type != QUIC_PACKET_INITIAL)
409 : return 0;
410 478 : if (quic_packet_get_token(&token, &p, &len))
411 : return -EINVAL;
412 478 : if (!quic_get_var(&p, &len, &length) || length > (u64)len)
413 : return err;
414 478 : cb->length = (u16)length;
415 : /* Copy skb data for restoring in case of decrypt failure. */
416 478 : data = kmemdup(skb->data, skb->len, GFP_ATOMIC);
417 478 : if (!data)
418 : return -ENOMEM;
419 :
420 478 : spin_lock(&qn->lock);
421 : /* Install initial keys for packet decryption to crypto. */
422 478 : crypto = &quic_net(net)->crypto;
423 478 : err = quic_crypto_initial_keys_install(crypto, &dcid, version, 1);
424 478 : if (err) {
425 0 : spin_unlock(&qn->lock);
426 0 : goto out;
427 : }
428 478 : cb->number_offset = (u16)(p - skb->data);
429 478 : err = quic_crypto_decrypt(crypto, skb);
430 478 : if (err) {
431 4 : spin_unlock(&qn->lock);
432 4 : QUIC_INC_STATS(net, QUIC_MIB_PKT_DECDROP);
433 : /* Restore original data on decrypt failure. */
434 8 : memcpy(skb->data, data, skb->len);
435 4 : goto out;
436 : }
437 474 : spin_unlock(&qn->lock);
438 :
439 474 : QUIC_INC_STATS(net, QUIC_MIB_PKT_DECFASTPATHS);
440 474 : cb->resume = 1; /* Mark this packet as already decrypted. */
441 :
442 : /* Find the QUIC CRYPTO frame. */
443 474 : p += cb->number_len;
444 474 : len = cb->length - cb->number_len - QUIC_TAG_LEN;
445 474 : for (; len && !(*p); p++, len--) /* Skip the padding frame. */
446 : ;
447 474 : if (!len-- || *p++ != QUIC_FRAME_CRYPTO)
448 0 : goto out;
449 474 : if (!quic_get_var(&p, &len, &offset) || offset)
450 56 : goto out;
451 418 : if (!quic_get_var(&p, &len, &length) || length > (u64)len)
452 0 : goto out;
453 :
454 : /* Parse the TLS CLIENT_HELLO message. */
455 418 : err = quic_packet_get_alpn(alpn, p, length);
456 :
457 478 : out:
458 478 : kfree(data);
459 478 : return err;
460 : }
461 :
462 : /* Extract the Destination Connection ID (DCID) from a QUIC Long header packet. */
463 3909 : int quic_packet_get_dcid(struct quic_conn_id *dcid, struct sk_buff *skb)
464 : {
465 3909 : u32 plen = skb->len;
466 3909 : u8 *p = skb->data;
467 3909 : u64 len;
468 :
469 3909 : if (plen < QUIC_HLEN + QUIC_VERSION_LEN)
470 : return -EINVAL;
471 3909 : plen -= (QUIC_HLEN + QUIC_VERSION_LEN);
472 3909 : p += (QUIC_HLEN + QUIC_VERSION_LEN);
473 :
474 3909 : if (!quic_get_int(&p, &plen, &len, 1) ||
475 3909 : len > plen || len > QUIC_CONN_ID_MAX_LEN)
476 : return -EINVAL;
477 3909 : quic_conn_id_update(dcid, p, len);
478 3909 : return 0;
479 : }
480 :
481 : /* Determine the QUIC socket associated with an incoming packet. */
482 6573280 : static struct sock *quic_packet_get_sock(struct sk_buff *skb)
483 : {
484 6573280 : struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
485 6573280 : struct net *net = dev_net(skb->dev);
486 6573280 : struct quic_conn_id dcid, *conn_id;
487 6573280 : union quic_addr daddr, saddr;
488 6573280 : struct quic_data alpns = {};
489 6573280 : struct sock *sk = NULL;
490 :
491 6573280 : if (skb->len < QUIC_HLEN)
492 : return NULL;
493 :
494 6573280 : if (!quic_hdr(skb)->form) { /* Short header path. */
495 6569856 : if (skb->len < QUIC_HLEN + QUIC_CONN_ID_DEF_LEN)
496 : return NULL;
497 : /* Fast path: look up QUIC connection by fixed-length DCID
498 : * (Currently, only source CIDs of size QUIC_CONN_ID_DEF_LEN are used).
499 : */
500 6569856 : conn_id = quic_conn_id_lookup(net, skb->data + QUIC_HLEN,
501 : QUIC_CONN_ID_DEF_LEN);
502 6570930 : if (conn_id) {
503 6570577 : cb->seqno = quic_conn_id_number(conn_id);
504 6570577 : return quic_conn_id_sk(conn_id); /* Return associated socket. */
505 : }
506 :
507 : /* Fallback: listener socket lookup
508 : * (May be used to send a stateless reset from a listen socket).
509 : */
510 353 : quic_get_msg_addrs(&daddr, &saddr, skb);
511 353 : sk = quic_listen_sock_lookup(skb, &daddr, &saddr, &alpns);
512 353 : if (sk)
513 : return sk;
514 : /* Final fallback: address-based connection lookup
515 : * (May be used to receive a stateless reset).
516 : */
517 158 : return quic_sock_lookup(skb, &daddr, &saddr, NULL);
518 : }
519 :
520 : /* Long header path. */
521 3424 : if (quic_packet_get_dcid(&dcid, skb))
522 : return NULL;
523 : /* Fast path: look up QUIC connection by parsed DCID. */
524 3424 : conn_id = quic_conn_id_lookup(net, dcid.data, dcid.len);
525 3424 : if (conn_id) {
526 2528 : cb->seqno = quic_conn_id_number(conn_id);
527 2528 : return quic_conn_id_sk(conn_id); /* Return associated socket. */
528 : }
529 :
530 : /* Fallback: address + DCID lookup
531 : * (May be used for 0-RTT or a follow-up Client Initial packet).
532 : */
533 896 : quic_get_msg_addrs(&daddr, &saddr, skb);
534 896 : sk = quic_sock_lookup(skb, &daddr, &saddr, &dcid);
535 896 : if (sk)
536 : return sk;
537 : /* Final fallback: listener socket lookup
538 : * (Used for receiving the first Client Initial packet).
539 : */
540 747 : if (quic_packet_parse_alpn(skb, &alpns))
541 : return NULL;
542 743 : return quic_listen_sock_lookup(skb, &daddr, &saddr, &alpns);
543 : }
544 :
545 : /* Entry point for processing received QUIC packets. */
546 6576345 : int quic_packet_rcv(struct sk_buff *skb, u8 err)
547 : {
548 6576345 : struct net *net = dev_net(skb->dev);
549 6576345 : struct sock *sk;
550 :
551 6576345 : if (unlikely(err))
552 3287 : return quic_packet_rcv_err(skb);
553 :
554 6573058 : skb_pull(skb, skb_transport_offset(skb));
555 :
556 : /* Look up socket from socket or connection IDs hash tables. */
557 6573137 : sk = quic_packet_get_sock(skb);
558 6574351 : if (!sk)
559 162 : goto err;
560 :
561 6574189 : bh_lock_sock(sk);
562 6574333 : if (sock_owned_by_user(sk)) {
563 : /* Socket is busy (owned by user context): queue to backlog. */
564 1344439 : if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
565 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_RCVDROP);
566 0 : bh_unlock_sock(sk);
567 0 : goto err;
568 : }
569 1344430 : QUIC_INC_STATS(net, QUIC_MIB_PKT_RCVBACKLOGS);
570 : } else {
571 : /* Socket not busy: process immediately. */
572 5229894 : QUIC_INC_STATS(net, QUIC_MIB_PKT_RCVFASTPATHS);
573 5229894 : sk->sk_backlog_rcv(sk, skb); /* quic_packet_process(). */
574 : }
575 6574219 : bh_unlock_sock(sk);
576 6574219 : return 0;
577 :
578 162 : err:
579 162 : kfree_skb(skb);
580 162 : return -EINVAL;
581 : }
582 :
583 : /* rfc9000#section-17.2.5:
584 : *
585 : * Retry Packet {
586 : * Header Form (1) = 1,
587 : * Fixed Bit (1) = 1,
588 : * Long Packet Type (2) = 3,
589 : * Unused (4),
590 : * Version (32),
591 : * Destination Connection ID Length (8),
592 : * Destination Connection ID (0..160),
593 : * Source Connection ID Length (8),
594 : * Source Connection ID (0..160),
595 : * Retry Token (..),
596 : * Retry Integrity Tag (128),
597 : * }
598 : *
599 : * A Retry packet uses a long packet header with a type value of 0x03. It carries an address
600 : * validation token created by the server. It is used by a server that wishes to perform a retry.
601 : */
602 5 : static int quic_packet_retry_create(struct sock *sk)
603 : {
604 5 : struct quic_crypto *crypto = quic_crypto(sk, QUIC_CRYPTO_INITIAL);
605 5 : u8 *p, buf[QUIC_FRAME_BUF_LARGE], tag[QUIC_TAG_LEN];
606 5 : struct quic_packet *packet = quic_packet(sk);
607 5 : union quic_addr *da = &packet->daddr;
608 5 : struct quic_conn_id conn_id;
609 5 : struct quichshdr *hdr;
610 5 : struct sk_buff *skb;
611 5 : u32 len, tlen, hlen;
612 5 : struct flowi fl;
613 :
614 : /* Clear routing cache and compute flow route. */
615 5 : __sk_dst_reset(sk);
616 5 : if (quic_flow_route(sk, da, &packet->saddr, &fl))
617 : return -EINVAL;
618 :
619 : /* Write token flags into buffer: QUIC_TOKEN_FLAG_RETRY means retry token. */
620 5 : quic_put_int(buf, QUIC_TOKEN_FLAG_RETRY, 1);
621 : /* Generate retry token using client's address and DCID from client initial packet. */
622 5 : if (quic_crypto_generate_token(crypto, da, sizeof(*da), &packet->dcid, buf, &tlen))
623 : return -EINVAL;
624 :
625 5 : quic_conn_id_generate(&conn_id); /* Generate new SCID for the Retry packet. */
626 : /* Compute total packet length: header + token + integrity tag. */
627 5 : len = QUIC_LONG_HLEN(&conn_id, &packet->scid) + tlen + QUIC_TAG_LEN;
628 5 : hlen = quic_encap_len(da) + MAX_HEADER;
629 5 : skb = alloc_skb(hlen + len, GFP_ATOMIC);
630 5 : if (!skb)
631 : return -ENOMEM;
632 5 : skb_reserve(skb, (int)(hlen + len));
633 :
634 : /* Build Long Packet header. */
635 5 : hdr = skb_push(skb, len);
636 5 : hdr->form = 1;
637 5 : hdr->fixed = !quic_outq(sk)->grease_quic_bit;
638 5 : hdr->type = quic_packet_version_put_type(packet->version, QUIC_PACKET_RETRY);
639 5 : hdr->reserved = 0;
640 5 : hdr->pnl = 0;
641 5 : skb_reset_transport_header(skb);
642 :
643 : /* Write the QUIC version. */
644 5 : p = (u8 *)hdr + QUIC_HLEN;
645 5 : p = quic_put_int(p, packet->version, QUIC_VERSION_LEN);
646 : /* Write Destination Connection ID. */
647 5 : p = quic_put_int(p, packet->scid.len, 1);
648 5 : p = quic_put_data(p, packet->scid.data, packet->scid.len);
649 : /* Write Source Connection ID. */
650 5 : p = quic_put_int(p, conn_id.len, 1);
651 5 : p = quic_put_data(p, conn_id.data, conn_id.len);
652 : /* Write Retry Token. */
653 5 : p = quic_put_data(p, buf, tlen);
654 : /* Generate and write Retry Integrity Tag.*/
655 5 : if (quic_crypto_get_retry_tag(crypto, skb, &packet->dcid, packet->version, tag)) {
656 0 : kfree_skb(skb);
657 0 : return -EINVAL;
658 : }
659 5 : quic_put_data(p, tag, QUIC_TAG_LEN);
660 :
661 : /* Transmit the Retry packet. */
662 5 : quic_lower_xmit(sk, skb, da, &fl);
663 5 : return 0;
664 : }
665 :
666 : /* rfc9000#section-17.2.1:
667 : *
668 : * Version Negotiation Packet {
669 : * Header Form (1) = 1,
670 : * Unused (7),
671 : * Version (32) = 0,
672 : * Destination Connection ID Length (8),
673 : * Destination Connection ID (0..2040),
674 : * Source Connection ID Length (8),
675 : * Source Connection ID (0..2040),
676 : * Supported Version (32) ...,
677 : * }
678 : *
679 : * A Version Negotiation packet is inherently not version specific. Upon receipt by a client,
680 : * it will be identified as a Version Negotiation packet based on the Version field having a
681 : * value of 0.
682 : *
683 : * The Version Negotiation packet is a response to a client packet that contains a version that
684 : * is not supported by the server. It is only sent by servers.
685 : */
686 230 : static int quic_packet_version_create(struct sock *sk)
687 : {
688 230 : struct quic_packet *packet = quic_packet(sk);
689 230 : union quic_addr *da = &packet->daddr;
690 230 : struct quichshdr *hdr;
691 230 : struct sk_buff *skb;
692 230 : u32 len, hlen, i;
693 230 : struct flowi fl;
694 230 : u8 *p;
695 :
696 : /* Clear routing cache and compute flow route. */
697 230 : __sk_dst_reset(sk);
698 230 : if (quic_flow_route(sk, da, &packet->saddr, &fl))
699 : return -EINVAL;
700 :
701 : /* Compute packet length: header + supported version list. */
702 230 : len = QUIC_LONG_HLEN(&packet->dcid, &packet->scid) + QUIC_VERSION_LEN * QUIC_VERSION_NUM;
703 230 : hlen = quic_encap_len(da) + MAX_HEADER;
704 230 : skb = alloc_skb(hlen + len, GFP_ATOMIC);
705 230 : if (!skb)
706 : return -ENOMEM;
707 230 : skb_reserve(skb, (int)(hlen + len));
708 :
709 : /* Build Long Packet header. */
710 230 : hdr = skb_push(skb, len);
711 230 : hdr->form = 1;
712 230 : hdr->fixed = !quic_outq(sk)->grease_quic_bit;
713 230 : hdr->type = 0;
714 230 : hdr->reserved = 0;
715 230 : hdr->pnl = 0;
716 230 : skb_reset_transport_header(skb);
717 :
718 : /* Write zero version. */
719 230 : p = (u8 *)hdr + QUIC_HLEN;
720 230 : p = quic_put_int(p, 0, QUIC_VERSION_LEN);
721 : /* Write Destination Connection ID. */
722 230 : p = quic_put_int(p, packet->scid.len, 1);
723 230 : p = quic_put_data(p, packet->scid.data, packet->scid.len);
724 : /* Write Source Connection ID. */
725 230 : p = quic_put_int(p, packet->dcid.len, 1);
726 230 : p = quic_put_data(p, packet->dcid.data, packet->dcid.len);
727 :
728 : /* Write Supported Versions. */
729 920 : for (i = 0; i < QUIC_VERSION_NUM; i++)
730 460 : p = quic_put_int(p, quic_versions[i][0], QUIC_VERSION_LEN);
731 :
732 : /* Transmit the Version Negotiation packet. */
733 230 : quic_lower_xmit(sk, skb, da, &fl);
734 230 : return 0;
735 : }
736 :
737 : #define QUIC_STATELESS_RESET_DEF_LEN 64
738 : #define QUIC_STATELESS_RESET_MIN_LEN (QUIC_HLEN + 5 + QUIC_CONN_ID_TOKEN_LEN)
739 :
740 : /* rfc9000#section-10.3:
741 : *
742 : * Stateless Reset {
743 : * Fixed Bits (2) = 1,
744 : * Unpredictable Bits (38..),
745 : * Stateless Reset Token (128),
746 : * }
747 : *
748 : * A stateless reset is provided as an option of last resort for an endpoint that does not have
749 : * access to the state of a connection. A crash or outage might result in peers continuing to
750 : * send data to an endpoint that is unable to properly continue the connection. An endpoint MAY
751 : * send a Stateless Reset in response to receiving a packet that it cannot associate with an
752 : * active connection.
753 : */
754 198 : static int quic_packet_stateless_reset_create(struct sock *sk)
755 : {
756 198 : struct quic_crypto *crypto = quic_crypto(sk, QUIC_CRYPTO_INITIAL);
757 198 : struct quic_packet *packet = quic_packet(sk);
758 198 : union quic_addr *da = &packet->daddr;
759 198 : u8 *p, token[QUIC_CONN_ID_TOKEN_LEN];
760 198 : struct sk_buff *skb;
761 198 : struct flowi fl;
762 198 : u32 len, hlen;
763 :
764 : /* Clear routing cache and compute flow route. */
765 198 : __sk_dst_reset(sk);
766 198 : if (quic_flow_route(sk, da, &packet->saddr, &fl))
767 : return -EINVAL;
768 :
769 : /* Generate stateless reset token from DCID in the packet received. */
770 198 : if (quic_crypto_generate_stateless_reset_token(crypto, packet->dcid.data,
771 198 : packet->dcid.len, token,
772 : QUIC_CONN_ID_TOKEN_LEN))
773 : return -EINVAL;
774 :
775 198 : len = QUIC_STATELESS_RESET_DEF_LEN;
776 198 : hlen = quic_encap_len(da) + MAX_HEADER;
777 198 : skb = alloc_skb(hlen + len, GFP_ATOMIC);
778 198 : if (!skb)
779 : return -ENOMEM;
780 198 : skb_reserve(skb, (int)(hlen + len));
781 :
782 198 : p = skb_push(skb, len);
783 : /* Write Unpredictable Bits. */
784 198 : get_random_bytes(p, len);
785 198 : skb_reset_transport_header(skb);
786 :
787 : /* Build Short Packet header. */
788 198 : quic_hdr(skb)->form = 0;
789 198 : quic_hdr(skb)->fixed = 1;
790 :
791 : /* Write end of packet with stateless reset token. */
792 198 : p += (len - QUIC_CONN_ID_TOKEN_LEN);
793 198 : quic_put_data(p, token, QUIC_CONN_ID_TOKEN_LEN);
794 :
795 : /* Transmit the Stateless Reset packet. */
796 198 : quic_lower_xmit(sk, skb, da, &fl);
797 198 : return 0;
798 : }
799 :
800 : /* Generate and send a CONNECTION_CLOSE frame on a listening socket in response to an invalid
801 : * client Initial packet. No accept socket exists yet to handle it.
802 : */
803 0 : static int quic_packet_refuse_close_create(struct sock *sk, u32 errcode)
804 : {
805 0 : struct quic_conn_id_set *id_set = quic_source(sk);
806 0 : struct quic_path_group *paths = quic_paths(sk);
807 0 : struct quic_packet *packet = quic_packet(sk);
808 0 : u8 level = QUIC_CRYPTO_INITIAL;
809 0 : struct quic_conn_id *active;
810 :
811 : /* Use the client's DCID as our SCID when responding. */
812 0 : active = quic_conn_id_active(id_set);
813 0 : quic_conn_id_update(active, packet->dcid.data, packet->dcid.len);
814 : /* Use path[1] for sending; path[0] remains for listening only. */
815 0 : quic_path_set_saddr(paths, 1, &packet->saddr);
816 0 : quic_path_set_daddr(paths, 1, &packet->daddr);
817 :
818 : /* Reinstall Initial keys for encryption with the client's version. */
819 0 : if (quic_packet_version_change(sk, active, packet->version))
820 : return -EINVAL;
821 : /* Set the errcode used in CLOSE frame and Transmit it at Initial level. */
822 0 : quic_outq(sk)->close_errcode = errcode;
823 0 : quic_outq_transmit_frame(sk, QUIC_FRAME_CONNECTION_CLOSE, &level, 1, false);
824 0 : return 0;
825 : }
826 :
827 : /* Process an incoming packet on a listening QUIC socket.
828 : *
829 : * Depending on the packet type and state, this may involve creating a request socket for a new
830 : * connection, responding with a Stateless Reset for unexpected Handshake or 1-RTT packets,
831 : * issuing a Retry packet for address validation when needed, or sending a Version Negotiation
832 : * packet if the client's QUIC version is unsupported.
833 : */
834 938 : static int quic_packet_listen_process(struct sock *sk, struct sk_buff *skb)
835 : {
836 938 : struct quic_packet *packet = quic_packet(sk);
837 938 : u32 version, errcode, len = skb->len;
838 938 : u8 *p = skb->data, type, retry = 0;
839 938 : struct net *net = sock_net(sk);
840 938 : struct quic_crypto *crypto;
841 938 : struct quic_conn_id odcid;
842 938 : struct quic_data token;
843 938 : int err = 0;
844 :
845 938 : if (!quic_hshdr(skb)->form) {
846 : /* rfc9000#section-10.3:
847 : *
848 : * An endpoint MAY send a Stateless Reset in response to receiving a packet
849 : * that it cannot associate with an active connection.
850 : */
851 195 : if (len < QUIC_HLEN + QUIC_CONN_ID_DEF_LEN) {
852 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVHDRDROP);
853 0 : err = -EINVAL;
854 0 : kfree_skb(skb);
855 0 : goto out;
856 : }
857 : /* We currently only issue Connection ID with size QUIC_CONN_ID_DEF_LEN. */
858 195 : quic_conn_id_update(&packet->dcid, (u8 *)quic_hdr(skb) + QUIC_HLEN,
859 : QUIC_CONN_ID_DEF_LEN);
860 : /* Send a Stateless Reset for this 1-RTT packet. */
861 195 : err = quic_packet_stateless_reset_create(sk);
862 195 : consume_skb(skb);
863 195 : goto out;
864 : }
865 :
866 : /* Read VERSION, Destination Connection ID and Scource Connection ID. */
867 743 : if (quic_packet_get_version_and_connid(&packet->dcid, &packet->scid, &version, &p, &len)) {
868 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVHDRDROP);
869 0 : err = -EINVAL;
870 0 : kfree_skb(skb);
871 0 : goto out;
872 : }
873 :
874 : /* Read Destination address (packet->saddr) and Source address (packet->daddr). */
875 743 : quic_get_msg_addrs(&packet->saddr, &packet->daddr, skb);
876 743 : if (quic_request_sock_exists(sk))
877 66 : goto enqueue; /* If the request sock already exists, queue the packet directly. */
878 :
879 677 : if (quic_accept_sock_exists(sk, skb))
880 19 : goto out; /* Skip if the packet has been handled by the matching accept socket. */
881 :
882 1316 : if (!quic_packet_compatible_versions(version)) {
883 : /* rfc9000#section-6.1:
884 : *
885 : * If the version selected by the client is not acceptable to the server, the
886 : * server responds with a Version Negotiation packet. This includes a list of
887 : * versions that the server will accept.
888 : */
889 230 : err = quic_packet_version_create(sk);
890 230 : consume_skb(skb);
891 230 : goto out;
892 : }
893 :
894 428 : type = quic_packet_version_get_type(version, quic_hshdr(skb)->type); /* Read Packet Type. */
895 428 : if (type != QUIC_PACKET_INITIAL) { /* Send a Stateless Reset for this Handshake packet. */
896 3 : err = quic_packet_stateless_reset_create(sk);
897 3 : consume_skb(skb);
898 3 : goto out;
899 : }
900 :
901 425 : if (quic_packet_get_token(&token, &p, &len)) { /* Read Token from this Initial packet. */
902 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVHDRDROP);
903 0 : err = -EINVAL;
904 0 : kfree_skb(skb);
905 0 : goto out;
906 : }
907 425 : packet->version = version;
908 : /* Save original DCID for future token validation or Retry logic. */
909 425 : quic_conn_id_update(&odcid, packet->dcid.data, packet->dcid.len);
910 : /* If configured to validate client addresses, handle token logic. */
911 425 : if (quic_config(sk)->validate_peer_address) {
912 9 : if (!token.len) {
913 : /* rfc9000#section-8.1.2:
914 : *
915 : * Upon receiving the client's Initial packet, the server can request
916 : * address validation by sending a Retry packet containing a token.
917 : */
918 5 : err = quic_packet_retry_create(sk);
919 5 : consume_skb(skb);
920 5 : goto out;
921 : }
922 : /* Verify Token. */
923 4 : crypto = quic_crypto(sk, QUIC_CRYPTO_INITIAL);
924 4 : err = quic_crypto_verify_token(crypto, &packet->daddr, sizeof(packet->daddr),
925 : &odcid, token.data, token.len);
926 4 : if (err) {
927 : /* rfc9000#section-8.1.3:
928 : *
929 : * If a server receives a client Initial that contains an invalid Retry
930 : * token but is otherwise valid, it knows the client will not accept
931 : * another Retry token. The server SHOULD immediately clos the
932 : * connection with an INVALID_TOKEN error.
933 : */
934 0 : errcode = QUIC_TRANSPORT_ERROR_INVALID_TOKEN;
935 0 : err = quic_packet_refuse_close_create(sk, errcode);
936 0 : consume_skb(skb);
937 0 : goto out;
938 : }
939 : /* Distinguish token source: Retry packet or NEW_TOKEN frame. */
940 4 : retry = *(u8 *)token.data == QUIC_TOKEN_FLAG_RETRY;
941 : }
942 :
943 : /* Add request sock for this new QUIC connection. */
944 420 : err = quic_request_sock_enqueue(sk, &odcid, retry);
945 420 : if (err) {
946 : /* rfc9000#section-5.2.2:
947 : *
948 : * If a server refuses to accept a new connection, it SHOULD send an Initial
949 : * packet containing a CONNECTION_CLOSE frame with error code CONNECTION_REFUSED.
950 : */
951 0 : errcode = QUIC_TRANSPORT_ERROR_CONNECTION_REFUSED;
952 0 : err = quic_packet_refuse_close_create(sk, errcode);
953 0 : consume_skb(skb);
954 0 : goto out;
955 : }
956 420 : enqueue:
957 : /* Check if listen socket’s receive buffer has space to hold the packet. */
958 486 : if (atomic_read(&sk->sk_rmem_alloc) + skb->len > (u32)sk->sk_rcvbuf) {
959 0 : err = -ENOBUFS;
960 0 : kfree_skb(skb);
961 0 : goto out;
962 : }
963 :
964 : /* Append to inqueue backlog list of listen socket and notify any blocked accept() calls. */
965 486 : skb_set_owner_r(skb, sk);
966 486 : quic_inq_backlog_tail(sk, skb);
967 486 : sk->sk_data_ready(sk);
968 938 : out:
969 938 : return err;
970 : }
971 :
972 0 : static int quic_packet_stateless_reset_process(struct sock *sk, struct sk_buff *skb)
973 : {
974 0 : struct quic_conn_id_set *id_set = quic_dest(sk);
975 0 : struct quic_connection_close close = {};
976 0 : u8 *token;
977 :
978 0 : if (skb->len < QUIC_STATELESS_RESET_MIN_LEN)
979 : return -EINVAL;
980 :
981 : /* rfc9000#section-10.3.1:
982 : *
983 : * An endpoint detects a potential Stateless Reset using the trailing 16 bytes of the UDP
984 : * datagram. An endpoint remembers all stateless reset tokens associated with the
985 : * connection IDs and remote addresses for datagrams it has recently sent. This includes
986 : * Stateless Reset Token field values from NEW_CONNECTION_ID frames and the server's
987 : * transport parameters but excludes stateless reset tokens associated with connection IDs
988 : * that are either unused or retired. The endpoint identifies a received datagram as a
989 : * Stateless Reset by comparing the last 16 bytes of the datagram with all stateless reset
990 : * tokens associated with the remote address on which the datagram was received.
991 : *
992 : */
993 0 : token = skb->data + skb->len - QUIC_CONN_ID_TOKEN_LEN;
994 0 : if (!quic_conn_id_token_exists(id_set, token))
995 : return -EINVAL;
996 :
997 : /* rfc9000#section-10.3:
998 : *
999 : * To support this process, an endpoint issues a stateless reset token, which is a 16-byte
1000 : * value that is hard to guess. If the peer subsequently receives a Stateless Reset, which
1001 : * is a UDP datagram that ends in that stateless reset token, the peer will immediately
1002 : * end the connection.
1003 : */
1004 0 : close.errcode = QUIC_TRANSPORT_ERROR_CRYPTO;
1005 0 : quic_inq_event_recv(sk, QUIC_EVENT_CONNECTION_CLOSE, &close);
1006 0 : quic_set_state(sk, QUIC_SS_CLOSED);
1007 0 : consume_skb(skb);
1008 0 : pr_debug("%s: peer reset\n", __func__);
1009 : return 0;
1010 : }
1011 :
1012 6 : static int quic_packet_retry_process(struct sock *sk, struct sk_buff *skb)
1013 : {
1014 6 : struct quic_crypto *crypto = quic_crypto(sk, QUIC_CRYPTO_INITIAL);
1015 6 : struct quic_path_group *paths = quic_paths(sk);
1016 6 : struct quic_packet *packet = quic_packet(sk);
1017 6 : struct quic_conn_id *active;
1018 6 : u8 *p, tag[QUIC_TAG_LEN];
1019 6 : u32 hlen, len, version;
1020 :
1021 6 : hlen = QUIC_LONG_HLEN(&packet->dcid, &packet->scid);
1022 6 : len = skb->len - hlen;
1023 6 : if (len < QUIC_TAG_LEN)
1024 0 : goto err;
1025 6 : p = skb->data + hlen;
1026 6 : version = packet->version;
1027 : /* rfc9000#section-17.2.5.2:
1028 : *
1029 : * Clients MUST discard Retry packets that have a Retry Integrity Tag that cannot be
1030 : * validated.
1031 : */
1032 6 : if (quic_crypto_get_retry_tag(crypto, skb, &paths->orig_dcid, version, tag) ||
1033 12 : memcmp(tag, p + len - QUIC_TAG_LEN, QUIC_TAG_LEN))
1034 0 : goto err;
1035 : /* Save the Retry token into quic_token(). */
1036 6 : if (quic_data_dup(quic_token(sk), p, len - QUIC_TAG_LEN))
1037 0 : goto err;
1038 : /* Update crypto keys using the new DCID (similar to version negotiation). */
1039 6 : if (quic_packet_version_change(sk, &packet->scid, version))
1040 0 : goto err;
1041 : /* rfc9000#section-17.2.5.2:
1042 : *
1043 : * A client sets the Destination Connection ID field of this Initial packet to the value
1044 : * from the Source Connection ID field in the Retry packet.
1045 : */
1046 6 : active = quic_conn_id_active(quic_dest(sk));
1047 6 : quic_conn_id_update(active, packet->scid.data, packet->scid.len);
1048 : /* rfc9000#section-7.3:
1049 : *
1050 : * If it sends a Retry packet, a server also includes the Source Connection ID field from
1051 : * the Retry packet in the retry_source_connection_id transport parameter.
1052 : *
1053 : * (Save the connection ID for authenticating this transport parameter later).
1054 : */
1055 6 : paths->retry = 1;
1056 6 : paths->retry_dcid = *active;
1057 : /* rfc9000#section-17.2.5.2:
1058 : *
1059 : * The client responds to a Retry packet with an Initial packet that includes the provided
1060 : * Retry token to continue connection establishment.
1061 : *
1062 : * (Retransmit the CRYPTO frame in an initial packet with token save in quic_token()).
1063 : */
1064 6 : quic_outq_retransmit_mark(sk, QUIC_CRYPTO_INITIAL, 1);
1065 6 : quic_outq_update_loss_timer(sk);
1066 6 : quic_outq_transmit(sk);
1067 :
1068 6 : consume_skb(skb);
1069 6 : return 0;
1070 0 : err:
1071 0 : kfree_skb(skb);
1072 0 : return -EINVAL;
1073 : }
1074 :
1075 0 : static int quic_packet_version_process(struct sock *sk, struct sk_buff *skb)
1076 : {
1077 0 : struct quic_packet *packet = quic_packet(sk);
1078 0 : u64 version, best = 0;
1079 0 : u32 hlen, len;
1080 0 : u8 *p;
1081 :
1082 0 : hlen = QUIC_LONG_HLEN(&packet->dcid, &packet->scid);
1083 0 : len = skb->len - hlen;
1084 0 : if (len < QUIC_VERSION_LEN)
1085 0 : goto err;
1086 :
1087 : /* rfc9368#section-2.1:
1088 : *
1089 : * Upon receiving the Version Negotiation packet, the client SHALL search for a version it
1090 : * supports in the list provided by the server.
1091 : */
1092 0 : p = skb->data + hlen;
1093 0 : while (len >= QUIC_VERSION_LEN) {
1094 : /* Parse all versions and choose the highest one. */
1095 0 : quic_get_int(&p, &len, &version, QUIC_VERSION_LEN);
1096 0 : if (quic_packet_compatible_versions(version) && best < version)
1097 : best = version;
1098 : }
1099 0 : if (best) {
1100 : /* Found one and update crypto keys using the new version. */
1101 0 : if (quic_packet_version_change(sk, &packet->scid, best))
1102 0 : goto err;
1103 : /* Retransmit the CRYPTO frame in an initial packet with new version. */
1104 0 : quic_outq_retransmit_mark(sk, QUIC_CRYPTO_INITIAL, 1);
1105 0 : quic_outq_update_loss_timer(sk);
1106 0 : quic_outq_transmit(sk);
1107 : }
1108 :
1109 0 : consume_skb(skb);
1110 0 : return 0;
1111 0 : err:
1112 0 : kfree_skb(skb);
1113 0 : return -EINVAL;
1114 : }
1115 :
1116 0 : static void quic_packet_decrypt_done(struct sk_buff *skb, int err)
1117 : {
1118 0 : if (err) {
1119 0 : QUIC_INC_STATS(sock_net(skb->sk), QUIC_MIB_PKT_DECDROP);
1120 0 : kfree_skb(skb);
1121 0 : pr_debug("%s: err: %d\n", __func__, err);
1122 0 : return;
1123 : }
1124 :
1125 : /* Decryption succeeded: queue the decrypted skb for asynchronous processing. */
1126 0 : quic_inq_decrypted_tail(skb->sk, skb);
1127 : }
1128 :
1129 : /* Process the header of an incoming long-header QUIC handshake packet. Parses the packet type
1130 : * and handles Version Negotiation and Retry if present. Sets packet->level to 0 if the packet
1131 : * is fully consumed.
1132 : */
1133 4576 : static int quic_packet_handshake_header_process(struct sock *sk, struct sk_buff *skb)
1134 : {
1135 4576 : u8 *p = (u8 *)quic_hshdr(skb), type = quic_hshdr(skb)->type;
1136 4576 : struct quic_packet *packet = quic_packet(sk);
1137 4576 : struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
1138 4576 : u32 len = skb->len, version;
1139 4576 : struct quic_data token;
1140 4576 : u64 length;
1141 :
1142 4576 : quic_packet_reset(packet); /* Reset packet state to prepare for new packet parsing. */
1143 : /* Read VERSION, Destination Connection ID and Scource Connection ID. */
1144 4576 : if (quic_packet_get_version_and_connid(&packet->dcid, &packet->scid, &version, &p, &len))
1145 : return -EINVAL;
1146 4576 : if (!version) { /* version == 0 indicates this is a version negotiation packet. */
1147 0 : quic_packet_version_process(sk, skb);
1148 0 : packet->level = 0;
1149 0 : return 0;
1150 : }
1151 4576 : type = quic_packet_version_get_type(version, type); /* Read Packet Type. */
1152 4576 : if (version != packet->version) {
1153 : /* Version negotiation is already complete on this non-listen socket; skip the
1154 : * packet if the negotiated version is unsupported.
1155 : */
1156 28 : if (type != QUIC_PACKET_INITIAL || !quic_packet_compatible_versions(version))
1157 : return -EINVAL;
1158 : /* Update crypto keys for the new negotiated version. */
1159 14 : if (quic_packet_version_change(sk, &quic_paths(sk)->orig_dcid, version))
1160 : return -EINVAL;
1161 : }
1162 4576 : switch (type) {
1163 1755 : case QUIC_PACKET_INITIAL:
1164 1755 : if (quic_packet_get_token(&token, &p, &len)) /* Read Token. */
1165 : return -EINVAL;
1166 1755 : packet->level = QUIC_CRYPTO_INITIAL;
1167 1755 : if (!quic_is_serv(sk) && token.len) {
1168 : /* rfc9000#section-17.2.2:
1169 : *
1170 : * Initial packets sent by the server MUST set the Token Length field to
1171 : * 0; clients that receive an Initial packet with a non-zero Token Length
1172 : * field MUST either discard the packet or generate a connection error of
1173 : * type PROTOCOL_VIOLATION.
1174 : */
1175 0 : packet->errcode = QUIC_TRANSPORT_ERROR_PROTOCOL_VIOLATION;
1176 0 : return -EINVAL;
1177 : }
1178 : break;
1179 : case QUIC_PACKET_HANDSHAKE:
1180 2500 : if (!quic_crypto(sk, QUIC_CRYPTO_HANDSHAKE)->recv_ready) {
1181 : /* Queue to backlog until Handshake keys are ready. */
1182 544 : quic_inq_backlog_tail(sk, skb);
1183 544 : return 0;
1184 : }
1185 1956 : packet->level = QUIC_CRYPTO_HANDSHAKE;
1186 1956 : break;
1187 : case QUIC_PACKET_0RTT:
1188 315 : if (!quic_crypto(sk, QUIC_CRYPTO_EARLY)->recv_ready) {
1189 : /* Queue to backlog until 0-RTT keys are ready. */
1190 134 : quic_inq_backlog_tail(sk, skb);
1191 134 : return 0;
1192 : }
1193 181 : packet->level = QUIC_CRYPTO_EARLY;
1194 181 : break;
1195 6 : case QUIC_PACKET_RETRY:
1196 6 : quic_packet_retry_process(sk, skb); /* Handle Retry packet. */
1197 6 : packet->level = 0;
1198 6 : return 0;
1199 : default:
1200 : return -EINVAL;
1201 : }
1202 :
1203 3892 : if (!quic_get_var(&p, &len, &length) || length > (u64)len) /* Read Payload Length. */
1204 : return -EINVAL;
1205 3892 : cb->length = (u16)length;
1206 3892 : cb->number_offset = (u16)(p - skb->data);
1207 3892 : return 0;
1208 : }
1209 :
1210 : /* Process an incoming long-header QUIC packet during the handshake phase. This packet may be a
1211 : * coalesced packet, including multiple long headers and a trailing short header.
1212 : */
1213 3858 : static int quic_packet_handshake_process(struct sock *sk, struct sk_buff *skb)
1214 : {
1215 3858 : struct quic_path_group *paths = quic_paths(sk);
1216 3858 : struct quic_packet *packet = quic_packet(sk);
1217 3858 : struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
1218 3858 : struct quic_inqueue *inq = quic_inq(sk);
1219 3858 : struct quic_cong *cong = quic_cong(sk);
1220 3858 : struct net *net = sock_net(sk);
1221 3858 : u8 is_serv = quic_is_serv(sk);
1222 3858 : struct quic_conn_id *conn_id;
1223 3858 : struct quic_frame frame = {};
1224 3858 : struct quic_crypto *crypto;
1225 3858 : struct quic_pnspace *space;
1226 3858 : struct udphdr *uh;
1227 3858 : int err = -EINVAL;
1228 :
1229 : /* Associate skb with sk to ensure sk is valid during async decryption completion. */
1230 3858 : WARN_ON(!skb_set_owner_sk_safe(skb, sk));
1231 3858 : sock_rps_save_rxhash(sk, skb);
1232 :
1233 : /* Loop to handle each QUIC packet in this coalesced packet. */
1234 7749 : while (skb->len > 0) {
1235 5594 : if (!quic_hshdr(skb)->form) { /* Short-header packet. */
1236 : /* If DCID doesn't match treat as padding, and increase anti-amplification
1237 : * credit if path isn't validated.
1238 : */
1239 1018 : conn_id = &packet->dcid;
1240 1018 : if (conn_id->len > skb->len - QUIC_HLEN ||
1241 2034 : memcmp(conn_id->data, skb->data + QUIC_HLEN, conn_id->len)) {
1242 306 : if (!paths->validated)
1243 177 : paths->ampl_rcvlen += skb->len;
1244 : break;
1245 : }
1246 : /* Handle short-header packet via quic_packet_app_process(). */
1247 712 : cb->number_offset = 0;
1248 712 : quic_packet_process(sk, skb);
1249 712 : skb = NULL;
1250 712 : break;
1251 : }
1252 : /* Parse long-header and handle Retry or Version Negotiation if present. */
1253 4576 : if (quic_packet_handshake_header_process(sk, skb)) {
1254 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVHDRDROP);
1255 0 : goto err;
1256 : }
1257 4576 : if (!packet->level) /* If already consumed (e.g., Retry), stop processing. */
1258 : return 0;
1259 :
1260 3892 : crypto = quic_crypto(sk, packet->level);
1261 3892 : space = quic_pnspace(sk, packet->level);
1262 :
1263 : /* Set highest received packet number for packet number decode during decryption. */
1264 3892 : cb->number_max = space->max_pn_seen;
1265 3892 : cb->crypto_done = quic_packet_decrypt_done;
1266 3892 : err = quic_crypto_decrypt(crypto, skb); /* Do packet decryption. */
1267 3892 : if (err) {
1268 0 : if (err == -EINPROGRESS) {
1269 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_DECBACKLOGS);
1270 0 : return err;
1271 : }
1272 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_DECDROP);
1273 0 : packet->errcode = cb->errcode;
1274 0 : goto err;
1275 : }
1276 3892 : if (!cb->resume) /* Already decrypted (e.g., via parse_alpn or async complete). */
1277 3423 : QUIC_INC_STATS(net, QUIC_MIB_PKT_DECFASTPATHS);
1278 3892 : if (quic_hshdr(skb)->reserved) {
1279 : /* rfc9000#section-17.2:
1280 : *
1281 : * An endpoint MUST treat receipt of a packet that has a non-zero value
1282 : * for these bits after removing both packet and header protection as a
1283 : * connection error of type PROTOCOL_VIOLATION.
1284 : */
1285 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVHDRDROP);
1286 0 : packet->errcode = QUIC_TRANSPORT_ERROR_PROTOCOL_VIOLATION;
1287 0 : goto err;
1288 : }
1289 :
1290 3892 : pr_debug("%s: recvd, num: %llu, level: %d, len: %d\n",
1291 : __func__, cb->number, packet->level, skb->len);
1292 :
1293 : /* Use packet arrival time as current time (may have been queued in backlog). */
1294 3892 : space->time = cb->time;
1295 3892 : cong->time = cb->time;
1296 3892 : err = quic_pnspace_check(space, cb->number);
1297 3892 : if (err) { /* Drop if packet number is outside ACK tracking range. */
1298 0 : if (err > 0) { /* Trigger an ACK if packet number was marked already. */
1299 0 : packet->ack_requested = 1;
1300 0 : goto next;
1301 : }
1302 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVNUMDROP);
1303 0 : err = -EINVAL;
1304 0 : goto err;
1305 : }
1306 :
1307 : /* Prepare a 'coalesced' frame for parsing and processing. */
1308 3892 : frame.data = skb->data + cb->number_offset + cb->number_len;
1309 3892 : frame.len = cb->length - cb->number_len - packet->taglen[1];
1310 3892 : frame.level = packet->level;
1311 3892 : frame.skb = skb;
1312 3892 : err = quic_frame_process(sk, &frame); /* Process this 'coalesced' frame. */
1313 3892 : if (err) {
1314 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVFRMDROP);
1315 0 : goto err;
1316 : }
1317 : /* Mark packet number as received for ACK generation. */
1318 3892 : err = quic_pnspace_mark(space, cb->number);
1319 3892 : if (err)
1320 0 : goto err;
1321 :
1322 : /* rfc9000#section-13.4.1:
1323 : *
1324 : * On receiving an IP packet with an ECT(0), ECT(1), or ECN-CE codepoint, an
1325 : * ECN-enabled endpoint accesses the ECN field and increases the corresponding
1326 : * ECT(0), ECT(1), or ECN-CE count. These ECN counts are included in subsequent
1327 : * ACK frames.
1328 : */
1329 3892 : quic_pnspace_inc_ecn_count(space, quic_get_msg_ecn(skb));
1330 :
1331 3892 : if (packet->has_sack) {
1332 : /* rfc9002#section-6:
1333 : *
1334 : * QUIC senders use acknowledgments to detect lost packets and a PTO to
1335 : * ensure acknowledgments are received.
1336 : */
1337 1532 : quic_outq_retransmit_mark(sk, packet->level, 0);
1338 1532 : quic_outq_update_loss_timer(sk);
1339 : }
1340 :
1341 3892 : if (!paths->validated) {
1342 : /* Increase anti-amplification credit if path isn't validated. */
1343 2859 : paths->ampl_rcvlen += cb->number_offset + cb->length;
1344 2859 : if (packet->level == QUIC_CRYPTO_HANDSHAKE) {
1345 : /* rfc9000#section-8.1:
1346 : *
1347 : * Once an endpoint has successfully processed a Handshake
1348 : * packet from the peer, it can consider the peer address to
1349 : * have been validated.
1350 : *
1351 : * (Handshake keys are ready, mark path validated and clean up
1352 : * transmitted initial packets).
1353 : */
1354 993 : paths->validated = 1;
1355 993 : quic_outq_transmitted_sack(sk, QUIC_CRYPTO_INITIAL,
1356 : QUIC_PN_MAP_MAX_PN, 0, -1, 0);
1357 : }
1358 : }
1359 :
1360 2899 : next:
1361 : /* Advance skb pointer to next QUIC packet. */
1362 3892 : skb_pull(skb, cb->number_offset + cb->length);
1363 :
1364 3892 : cb->resume = 0; /* Clear resume flag for next packet decryption. */
1365 3892 : skb_reset_transport_header(skb);
1366 3892 : if (!packet->ack_requested) /* If no ACK-eliciting frame, skip ACK generation. */
1367 935 : continue;
1368 :
1369 2957 : space->need_sack = 1; /* Mark that an ACK needs to be sent for this packet space. */
1370 :
1371 2957 : if (packet->level == QUIC_CRYPTO_INITIAL) {
1372 1337 : if (!is_serv) {
1373 : /* rfc9000#section-7.2
1374 : *
1375 : * After processing the first Initial packet, each endpoint sets the
1376 : * Destination Connection ID field in subsequent packets it sends to
1377 : * the value of the Source Connection ID field that it received.
1378 : *
1379 : * (Sever sets it when creating the accept socket in accpet()).
1380 : */
1381 623 : conn_id = quic_conn_id_active(quic_dest(sk));
1382 623 : quic_conn_id_update(conn_id, packet->scid.data, packet->scid.len);
1383 623 : continue;
1384 : }
1385 : /* rfc9000#section-14.1:
1386 : *
1387 : * A server MUST discard an Initial packet that is carried in a UDP
1388 : * datagram with a payload that is smaller than the smallest allowed
1389 : * maximum datagram size of 1200 bytes. A server MAY also immediately
1390 : * close the connection by sending a CONNECTION_CLOSE frame with an
1391 : * error code of PROTOCOL_VIOLATION.
1392 : */
1393 714 : uh = quic_udphdr(skb);
1394 714 : if (ntohs(uh->len) - sizeof(*uh) < QUIC_MIN_UDP_PAYLOAD) {
1395 1 : packet->errcode = QUIC_TRANSPORT_ERROR_PROTOCOL_VIOLATION;
1396 1 : err = -EINVAL;
1397 1 : goto err;
1398 : }
1399 : }
1400 : }
1401 3173 : if (inq->sack_flag == QUIC_SACK_FLAG_NONE) {
1402 : /* ACKs are not sent immediately, as they are typically bundled with other TLS
1403 : * messages from userspace. If userspace doesn't send anything, start the
1404 : * ack_delay timer to ensure ACKs are eventually transmitted.
1405 : */
1406 2399 : quic_timer_reset(sk, QUIC_TIMER_SACK, inq->max_ack_delay);
1407 2399 : inq->sack_flag = QUIC_SACK_FLAG_XMIT;
1408 : }
1409 3173 : if (paths->blocked) {
1410 : /* The path was previously blocked due to the anti-amplification limit. Now that
1411 : * additional credit may be available, unblock the path and update the loss timer
1412 : * to allow transmission of pending frames.
1413 : */
1414 34 : paths->blocked = 0;
1415 34 : quic_outq_update_loss_timer(sk);
1416 : }
1417 :
1418 3173 : consume_skb(skb);
1419 3173 : return 0;
1420 1 : err:
1421 1 : pr_debug("%s: failed, num: %llu, level: %d, err: %d\n",
1422 : __func__, cb->number, packet->level, err);
1423 : /* Transmit a CLOSE frame packet if errcode is set. */
1424 1 : quic_outq_transmit_close(sk, frame.type, packet->errcode, packet->level);
1425 1 : kfree_skb(skb);
1426 1 : return err;
1427 : }
1428 :
1429 : /* Process detected connection migration. Either initiate probing on a newly discovered
1430 : * alternate path or finalize migration if the new path is now active.
1431 : */
1432 6571224 : static void quic_packet_path_alt_process(struct sock *sk, struct sk_buff *skb)
1433 : {
1434 6571224 : struct quic_path_group *paths = quic_paths(sk);
1435 6571224 : struct quic_packet *packet = quic_packet(sk);
1436 6571224 : struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
1437 :
1438 6571224 : if (cb->path) {
1439 : /* Start path probe for connection migration if an alternate path is detected
1440 : * and connection migration has not yet been initiated.
1441 : */
1442 259 : if (quic_path_alt_state(paths, QUIC_PATH_ALT_NONE))
1443 55 : quic_outq_probe_path_alt(sk, true);
1444 259 : return;
1445 : }
1446 :
1447 6570965 : if (!packet->non_probing || cb->number != cb->number_max ||
1448 6511411 : !quic_path_alt_state(paths, QUIC_PATH_ALT_SWAPPED))
1449 : return;
1450 :
1451 : /* Connection migration is complete: free old path resources if this is a non-probing,
1452 : * highest-numbered received packet after the new path was successfully swapped in as
1453 : * active.
1454 : */
1455 81 : quic_path_free(sk, paths, 1);
1456 81 : quic_conn_id_set_alt(quic_dest(sk), NULL);
1457 : /* Update the active source connection ID after connection migration. This ID is not used
1458 : * in 1-RTT packets but is tracked to detect changes in the destination connection ID of
1459 : * incoming packets. If the ID remains the same, it likely indicates a NAT rebinding rather
1460 : * than a true migration, and there's no need to use a new connection ID for the new path.
1461 : */
1462 81 : quic_conn_id_update_active(quic_source(sk), cb->seqno);
1463 : }
1464 :
1465 : /* Final processing steps for a 1-RTT QUIC packet. */
1466 6571221 : static int quic_packet_app_process_done(struct sock *sk, struct sk_buff *skb)
1467 : {
1468 6571221 : struct quic_pnspace *space = quic_pnspace(sk, QUIC_CRYPTO_APP);
1469 6571221 : struct quic_stream_table *streams = quic_streams(sk);
1470 6571221 : struct quic_path_group *paths = quic_paths(sk);
1471 6571221 : struct quic_packet *packet = quic_packet(sk);
1472 6571221 : struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
1473 6571221 : struct quic_inqueue *inq = quic_inq(sk);
1474 6571221 : s64 max_bidi = 0, max_uni = 0;
1475 6571221 : u8 frame;
1476 :
1477 : /* rfc9000#section-13.4.1:
1478 : *
1479 : * On receiving an IP packet with an ECT(0), ECT(1), or ECN-CE codepoint, an
1480 : * ECN-enabled endpoint accesses the ECN field and increases the corresponding ECT(0),
1481 : * ECT(1), or ECN-CE count. These ECN counts are included in subsequent ACK frames.
1482 : */
1483 6571221 : quic_pnspace_inc_ecn_count(space, quic_get_msg_ecn(skb));
1484 :
1485 6571218 : quic_packet_path_alt_process(sk, skb); /* Process connection migration. */
1486 :
1487 6571212 : if (!paths->validated) /* Increase anti-amplification credit if path isn't validated. */
1488 0 : paths->ampl_rcvlen += skb->len;
1489 :
1490 6571212 : if (packet->has_sack) {
1491 : /* rfc9002#section-6:
1492 : *
1493 : * QUIC senders use acknowledgments to detect lost packets and a PTO to ensure
1494 : * acknowledgments are received.
1495 : */
1496 382608 : quic_outq_retransmit_mark(sk, 0, 0);
1497 382608 : quic_outq_update_loss_timer(sk);
1498 : }
1499 :
1500 6571212 : if (quic_stream_max_streams_update(streams, &max_uni, &max_bidi)) {
1501 : /* If stream limits changed, advertise updated stream credit to peer. */
1502 831 : if (max_uni) {
1503 142 : frame = QUIC_FRAME_MAX_STREAMS_UNI;
1504 142 : quic_outq_transmit_frame(sk, frame, &max_uni, 0, true);
1505 : }
1506 831 : if (max_bidi) {
1507 689 : frame = QUIC_FRAME_MAX_STREAMS_BIDI;
1508 689 : quic_outq_transmit_frame(sk, frame, &max_bidi, 0, true);
1509 : }
1510 : }
1511 :
1512 6571378 : if (!packet->ack_requested) /* If no ACK-eliciting frame, skip ACK generation. */
1513 58162 : goto out;
1514 :
1515 6513216 : if (!packet->ack_immediate) {
1516 : /* Start ack delay timer to generate ACK frames on 1-RTT level then transmit all
1517 : * pending ACKs.
1518 : */
1519 6344211 : if (inq->sack_flag == QUIC_SACK_FLAG_NONE)
1520 52153 : quic_timer_reset(sk, QUIC_TIMER_SACK, inq->max_ack_delay);
1521 6344211 : inq->sack_flag = QUIC_SACK_FLAG_APP;
1522 6344211 : goto out;
1523 : }
1524 169005 : space->need_sack = 1; /* Mark that an ACK needs to be sent for this packet space. */
1525 169005 : space->sack_path = cb->path; /* Send immediate ACK on the same path as received packet. */
1526 :
1527 6571378 : out:
1528 6571378 : if (quic_is_established(sk)) {
1529 : /* If connection is established, send ACKs immediately. Reuse ack_delay as an
1530 : * idle timer.
1531 : */
1532 6570086 : if (inq->sack_flag == QUIC_SACK_FLAG_NONE)
1533 220470 : quic_timer_reset(sk, QUIC_TIMER_IDLE, inq->timeout);
1534 6570086 : quic_outq_transmit(sk);
1535 1292 : } else if (inq->sack_flag == QUIC_SACK_FLAG_NONE) {
1536 : /* If not yet established, ACKs are not sent immediately, as they are typically
1537 : * bundled with other TLS messages from userspace. If userspace doesn't send
1538 : * anything, start the ack_delay timer to ensure ACKs are eventually transmitted.
1539 : */
1540 340 : inq->sack_flag = QUIC_SACK_FLAG_XMIT;
1541 340 : quic_timer_reset(sk, QUIC_TIMER_SACK, inq->max_ack_delay);
1542 : }
1543 6571828 : consume_skb(skb);
1544 6571375 : return 0;
1545 : }
1546 :
1547 : /* Process an incoming 1-RTT packet. */
1548 6568857 : static int quic_packet_app_process(struct sock *sk, struct sk_buff *skb)
1549 : {
1550 6568857 : struct quic_conn_id_set *dest = quic_dest(sk), *source = quic_source(sk);
1551 6568857 : struct quic_pnspace *space = quic_pnspace(sk, QUIC_CRYPTO_APP);
1552 6568857 : struct quic_crypto *crypto = quic_crypto(sk, QUIC_CRYPTO_APP);
1553 6568857 : struct quic_packet *packet = quic_packet(sk);
1554 6568857 : struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
1555 6568857 : struct net *net = sock_net(sk);
1556 6568857 : struct quic_frame frame = {};
1557 6568857 : u8 taglen, key_phase;
1558 6568857 : int err = -EINVAL;
1559 :
1560 : /* Associate skb with sk to ensure sk is valid during async decryption completion. */
1561 6568857 : WARN_ON(!skb_set_owner_sk_safe(skb, sk));
1562 6571869 : sock_rps_save_rxhash(sk, skb);
1563 :
1564 6571869 : quic_packet_reset(packet); /* Reset packet state to prepare for new packet parsing. */
1565 6571869 : if (!quic_hdr(skb)->fixed && !quic_inq(sk)->grease_quic_bit) {
1566 : /* rfc9000#section-17.3.1:
1567 : *
1568 : * Packets containing a zero value for this bit are not valid packets in this
1569 : * version and MUST be discarded.
1570 : *
1571 : * rfc9287#section-3:
1572 : *
1573 : * An endpoint that advertises the grease_quic_bit transport parameter MUST accept
1574 : * packets with the QUIC Bit set to a value of 0.
1575 : */
1576 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVHDRDROP);
1577 0 : goto err;
1578 : }
1579 :
1580 6571869 : if (!crypto->recv_ready) { /* Queue to backlog until 1-RTT keys are ready. */
1581 482 : quic_inq_backlog_tail(sk, skb);
1582 482 : return 0;
1583 : }
1584 :
1585 6571387 : if (cb->seqno == -1) {
1586 : /* No valid matched connection ID was found, so treat this as a potential
1587 : * stateless reset packet.
1588 : */
1589 0 : if (!quic_packet_stateless_reset_process(sk, skb))
1590 : return 0;
1591 0 : goto err;
1592 : }
1593 : /* Calculate Payload Length. */
1594 6571387 : cb->number_offset = QUIC_CONN_ID_DEF_LEN + QUIC_HLEN;
1595 6571387 : cb->length = (u16)(skb->len - cb->number_offset);
1596 :
1597 : /* Set highest received packet number for packet number decode during decryption. */
1598 6571387 : cb->number_max = space->max_pn_seen;
1599 6571387 : cb->crypto_done = quic_packet_decrypt_done;
1600 :
1601 : /* draft-banks-quic-disable-encryption#section-2.1:
1602 : *
1603 : * Advertising the disable_1rtt_encryption transport parameter indicates that the
1604 : * endpoint wishes to disable encryption for 1-RTT packets. If successfully
1605 : * negotiated, all packets that would normally be encrypted with the 1-RTT key are
1606 : * instead sent as cleartext; both header and packet protections are disabled.
1607 : */
1608 6571387 : taglen = quic_packet_taglen(packet);
1609 6571387 : if (!taglen) /* Indicates disable_1rtt_encryption was negotiated. */
1610 1550657 : cb->resume = 1;
1611 6571387 : err = quic_crypto_decrypt(crypto, skb); /* Do packet decryption. */
1612 6570901 : if (err) {
1613 0 : if (err == -EINPROGRESS) {
1614 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_DECBACKLOGS);
1615 0 : return err;
1616 : }
1617 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_DECDROP);
1618 0 : if (cb->key_update) {
1619 : /* Notify application of the key update with new key phase even if the
1620 : * decryption failed, as the new key has been installed.
1621 : */
1622 0 : key_phase = cb->key_phase;
1623 0 : quic_inq_event_recv(sk, QUIC_EVENT_KEY_UPDATE, &key_phase);
1624 0 : goto err;
1625 : }
1626 : /* If this is not a result of a key update, propagate error to close connection. */
1627 0 : packet->errcode = cb->errcode;
1628 0 : goto err;
1629 : }
1630 6570901 : if (cb->key_update) { /* Notify application of the key update with new key phase. */
1631 19 : key_phase = cb->key_phase;
1632 19 : quic_inq_event_recv(sk, QUIC_EVENT_KEY_UPDATE, &key_phase);
1633 : }
1634 6570901 : if (!cb->resume) /* No decryption (e.g., via disable_1rtt_encryption or async complete). */
1635 5020441 : QUIC_INC_STATS(net, QUIC_MIB_PKT_DECFASTPATHS);
1636 6570834 : if (quic_hdr(skb)->reserved) {
1637 : /* rfc9000#section-17.2:
1638 : *
1639 : * An endpoint MUST treat receipt of a packet that has a non-zero value for
1640 : * these bits after removing both packet and header protection as a connection
1641 : * error of type PROTOCOL_VIOLATION.
1642 : */
1643 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVHDRDROP);
1644 0 : packet->errcode = QUIC_TRANSPORT_ERROR_PROTOCOL_VIOLATION;
1645 0 : goto err;
1646 : }
1647 :
1648 6570834 : pr_debug("%s: recvd, num: %llu, len: %d\n", __func__, cb->number, skb->len);
1649 :
1650 : /* Use packet arrival time as current time (may have been queued in backlog). */
1651 6570835 : space->time = cb->time;
1652 6570835 : quic_cong(sk)->time = cb->time;
1653 6570835 : err = quic_pnspace_check(space, cb->number);
1654 6570838 : if (err) {
1655 0 : if (err > 0) { /* Trigger an immediate ACK if packet number was already marked. */
1656 0 : packet->ack_requested = 1;
1657 0 : packet->ack_immediate = 1;
1658 0 : goto out;
1659 : }
1660 : /* Drop if packet number is outside ACK tracking range. */
1661 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVNUMDROP);
1662 0 : err = -EINVAL;
1663 0 : goto err;
1664 : }
1665 :
1666 : /* Read Destination address (packet->saddr) and Source address (packet->daddr). */
1667 6570838 : quic_get_msg_addrs(&packet->saddr, &packet->daddr, skb);
1668 : /* Detect alternate path if migration occurred. */
1669 6570097 : cb->path = quic_path_detect_alt(quic_paths(sk), &packet->saddr, &packet->daddr, sk);
1670 6567518 : if (cb->path && !quic_conn_id_select_alt(dest, cb->seqno == source->active->number)) {
1671 : /* Send RETIRE_CONNECTION_ID frame to request a new dest connection ID if no
1672 : * alternative one.
1673 : */
1674 0 : u64 seqno = quic_conn_id_first_number(dest);
1675 :
1676 0 : quic_outq_transmit_frame(sk, QUIC_FRAME_RETIRE_CONNECTION_ID, &seqno, 0, false);
1677 0 : goto err;
1678 : }
1679 :
1680 : /* Prepare a 'coalesced' frame for parsing and processing. */
1681 6567518 : frame.data = skb->data + cb->number_offset + cb->number_len;
1682 6567518 : frame.len = cb->length - cb->number_len - taglen;
1683 6567518 : frame.path = cb->path;
1684 6567518 : frame.skb = skb;
1685 6567518 : err = quic_frame_process(sk, &frame); /* Process this 'coalesced' frame. */
1686 6571310 : if (err) {
1687 48 : QUIC_INC_STATS(net, QUIC_MIB_PKT_INVFRMDROP);
1688 48 : goto err;
1689 : }
1690 : /* Mark packet number as received for ACK generation. */
1691 6571262 : err = quic_pnspace_mark(space, cb->number);
1692 6571221 : if (err)
1693 0 : goto err;
1694 :
1695 6571221 : out:
1696 6571221 : return quic_packet_app_process_done(sk, skb);
1697 :
1698 48 : err:
1699 48 : pr_debug("%s: failed, num: %llu, len: %d, err: %d\n",
1700 : __func__, cb->number, skb->len, err);
1701 : /* Transmit a CLOSE frame packet if errcode is set. */
1702 48 : quic_outq_transmit_close(sk, packet->errframe, packet->errcode, 0);
1703 48 : kfree_skb(skb);
1704 48 : return err;
1705 : }
1706 :
1707 6573835 : int quic_packet_process(struct sock *sk, struct sk_buff *skb)
1708 : {
1709 6573835 : if (quic_is_closed(sk)) {
1710 14 : kfree_skb(skb);
1711 14 : return 0;
1712 : }
1713 :
1714 6573821 : if (quic_is_listen(sk))
1715 938 : return quic_packet_listen_process(sk, skb);
1716 :
1717 6572883 : if (quic_hdr(skb)->form)
1718 3858 : return quic_packet_handshake_process(sk, skb);
1719 :
1720 6569025 : return quic_packet_app_process(sk, skb);
1721 : }
1722 :
1723 : /* Make these fixed for easy coding. */
1724 : #define QUIC_PACKET_NUMBER_LEN 4
1725 : #define QUIC_PACKET_LENGTH_LEN 4
1726 :
1727 : #define QUIC_MAX_ECN_PROBES 3
1728 :
1729 6556957 : static u8 *quic_packet_pack_frames(struct sock *sk, struct sk_buff *skb,
1730 : struct quic_packet_sent *sent, u16 off)
1731 : {
1732 6556957 : struct quic_path_group *paths = quic_paths(sk);
1733 6556957 : struct quic_packet *packet = quic_packet(sk);
1734 6556957 : struct quic_outqueue *outq = quic_outq(sk);
1735 6556957 : struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
1736 6556957 : u32 now = jiffies_to_usecs(jiffies);
1737 6556959 : struct quic_frame *frame, *next;
1738 6556959 : struct quic_frame_frag *frag;
1739 6556959 : struct quic_pnspace *space;
1740 6556959 : u8 *p = skb->data + off;
1741 6556959 : s64 number;
1742 6556959 : u16 i = 0;
1743 :
1744 6556959 : space = quic_pnspace(sk, packet->level);
1745 6556959 : number = space->next_pn++;
1746 :
1747 : /* Store packet metadata in skb CB for later use (e.g., encryption). */
1748 6556959 : cb->number_len = QUIC_PACKET_NUMBER_LEN;
1749 6556959 : cb->number_offset = off;
1750 6556959 : cb->number = number;
1751 6556959 : cb->level = packet->level;
1752 6556959 : cb->path = packet->path;
1753 :
1754 6556959 : p = quic_put_int(p, number, cb->number_len); /* Write packet number. */
1755 :
1756 13463650 : list_for_each_entry_safe(frame, next, &packet->frame_list, list) {
1757 6904924 : list_del(&frame->list);
1758 : /* Write main frame data and appended fragments. */
1759 6902423 : p = quic_put_data(p, frame->data, frame->size);
1760 15206223 : for (frag = frame->flist; frag; frag = frag->next)
1761 8302120 : p = quic_put_data(p, frag->data, frag->size);
1762 6904103 : pr_debug("%s: num: %llu, type: %u, packet_len: %u, frame_len: %u, level: %u\n",
1763 : __func__, number, frame->type, skb->len, frame->len, packet->level);
1764 6905599 : if (!frame->ack_eliciting || quic_frame_ping(frame->type)) {
1765 : /* Skip non-ACK-eliciting or ping frames for tracking. */
1766 381407 : quic_frame_put(frame);
1767 381424 : continue;
1768 : }
1769 6524192 : if (frame->offset < 0) {
1770 : /* First time sending: record offset and adjust unsent byte count. */
1771 6510667 : frame->offset = number;
1772 6510667 : outq->unsent_bytes -= frame->bytes;
1773 : }
1774 6524192 : quic_outq_transmitted_tail(sk, frame); /* Move frame to transmitted queue. */
1775 : /* Hold frame in sent packet record. */
1776 6523558 : sent->frame_array[i++] = quic_frame_get(frame);
1777 : }
1778 :
1779 : /* Track bytes sent before address validation to respect amplification limits for server. */
1780 6558726 : if (quic_is_serv(sk) && !paths->validated)
1781 1808 : paths->ampl_sndlen += skb->len + quic_packet_taglen(packet);
1782 :
1783 : /* Reset path validation timer if handshake is done and we're not currently probing an
1784 : * alternate path. After handshake, the timer may trigger PATH_CHALLENGE frames for
1785 : * continued path validation, which should be suppressed if we've just sent ACK-eliciting
1786 : * data to avoid unnecessary probes.
1787 : */
1788 6558726 : if (quic_is_established(sk) && !quic_path_alt_state(paths, QUIC_PATH_ALT_PROBING))
1789 6552551 : quic_timer_reset_path(sk);
1790 :
1791 : /* Update the last sent timestamp if this packet is ACK-eliciting. This is important for
1792 : * loss detection and PTO (Probe Timeout) logic.
1793 : */
1794 6558540 : if (packet->ack_eliciting)
1795 6484820 : space->last_sent_time = now;
1796 :
1797 6558540 : if (!sent) /* If the packet doesn't need tracking for ACK or loss detection, we're done. */
1798 : return p;
1799 :
1800 : /* rfc9000#section-13.4.2:
1801 : *
1802 : * To perform ECN validation for a new path:
1803 : *
1804 : * The endpoint sets an ECT(0) codepoint in the IP header of early outgoing packets sent
1805 : * on a new path to the peer.
1806 : */
1807 6484618 : if (!packet->level && paths->ecn_probes < QUIC_MAX_ECN_PROBES) {
1808 2744 : paths->ecn_probes++;
1809 2744 : cb->ecn = INET_ECN_ECT_0;
1810 2744 : sent->ecn = INET_ECN_ECT_0;
1811 : }
1812 : /* Fill metadata for this sent packet.
1813 : * Convert CRYPTO level to PN space level since 0-RTT and 1-RTT share PN space.
1814 : */
1815 6484618 : sent->number = number;
1816 6484618 : sent->sent_time = now;
1817 6484618 : sent->frame_len = packet->frame_len;
1818 6484618 : sent->level = (packet->level % QUIC_CRYPTO_EARLY);
1819 :
1820 6484618 : space->inflight += sent->frame_len;
1821 6484618 : outq->inflight += sent->frame_len;
1822 : /* Append packet to sent list for loss and ACK tracking. */
1823 6484618 : quic_outq_packet_sent_tail(sk, sent);
1824 :
1825 : /* Call cong.on_packet_sent() where it does pacing time update. */
1826 6484421 : quic_cong_on_packet_sent(quic_cong(sk), sent->sent_time, sent->frame_len, number);
1827 : /* Refresh loss detection timer after sending data. */
1828 6484612 : quic_outq_update_loss_timer(sk);
1829 6484612 : return p;
1830 : }
1831 :
1832 6479843 : static struct quic_packet_sent *quic_packet_sent_alloc(u16 frames)
1833 : {
1834 6479843 : u32 len = frames * sizeof(struct quic_frame *);
1835 6479843 : struct quic_packet_sent *sent;
1836 :
1837 6479843 : sent = kzalloc(sizeof(*sent) + len, GFP_ATOMIC);
1838 6480584 : if (sent)
1839 6480584 : sent->frames = frames;
1840 :
1841 6480584 : return sent;
1842 : }
1843 :
1844 : /* rfc9000#section-17.2.2:
1845 : *
1846 : * Initial Packet {
1847 : * Header Form (1) = 1,
1848 : * Fixed Bit (1) = 1,
1849 : * Long Packet Type (2) = 0,
1850 : * Reserved Bits (2),
1851 : * Packet Number Length (2),
1852 : * Version (32),
1853 : * Destination Connection ID Length (8),
1854 : * Destination Connection ID (0..160),
1855 : * Source Connection ID Length (8),
1856 : * Source Connection ID (0..160),
1857 : * Token Length (i),
1858 : * Token (..),
1859 : * Length (i),
1860 : * Packet Number (8..32),
1861 : * Packet Payload (8..),
1862 : * }
1863 : *
1864 : * An Initial packet uses long headers with a type value of 0x00. It carries the first CRYPTO
1865 : * frames sent by the client and server to perform key exchange, and it carries ACK frames in
1866 : * either direction.
1867 : *
1868 : * rfc9000#section-17.2.4:
1869 : *
1870 : * Handshake Packet {
1871 : * Header Form (1) = 1,
1872 : * Fixed Bit (1) = 1,
1873 : * Long Packet Type (2) = 2,
1874 : * Reserved Bits (2),
1875 : * Packet Number Length (2),
1876 : * Version (32),
1877 : * Destination Connection ID Length (8),
1878 : * Destination Connection ID (0..160),
1879 : * Source Connection ID Length (8),
1880 : * Source Connection ID (0..160),
1881 : * Length (i),
1882 : * Packet Number (8..32),
1883 : * Packet Payload (8..),
1884 : * }
1885 : *
1886 : * A Handshake packet uses long headers with a type value of 0x02, followed by the Length and
1887 : * Packet Number fields. The first byte contains the Reserved and Packet Number Length bits. It is
1888 : * used to carry cryptographic handshake messages and acknowledgments from the server and client.
1889 : *
1890 : * rfc9000#section-17.2.3:
1891 : *
1892 : * 0-RTT Packet {
1893 : * Header Form (1) = 1,
1894 : * Fixed Bit (1) = 1,
1895 : * Long Packet Type (2) = 1,
1896 : * Reserved Bits (2),
1897 : * Packet Number Length (2),
1898 : * Version (32),
1899 : * Destination Connection ID Length (8),
1900 : * Destination Connection ID (0..160),
1901 : * Source Connection ID Length (8),
1902 : * Source Connection ID (0..160),
1903 : * Length (i),
1904 : * Packet Number (8..32),
1905 : * Packet Payload (8..),
1906 : * }
1907 : *
1908 : * A 0-RTT packet uses long headers with a type value of 0x01, followed by the Length and Packet
1909 : * Number fields. The first byte contains the Reserved and Packet Number Length bits. A 0-RTT
1910 : * packet is used to carry "early" data from the client to the server as part of the first flight,
1911 : * prior to handshake completion.
1912 : */
1913 5583 : static struct sk_buff *quic_packet_handshake_create(struct sock *sk)
1914 : {
1915 5583 : struct quic_conn_id_set *dest = quic_dest(sk), *source = quic_source(sk);
1916 5583 : struct quic_packet *packet = quic_packet(sk);
1917 5583 : u8 type, fixed = 1, level = packet->level;
1918 5583 : struct quic_packet_sent *sent = NULL;
1919 5583 : struct quic_conn_id *active;
1920 5583 : u32 len, hlen, plen = 0;
1921 5583 : struct quichshdr *hdr;
1922 5583 : struct sk_buff *skb;
1923 5583 : u16 off;
1924 5583 : u8 *p;
1925 :
1926 : /* Determine packet type based on encryption level. */
1927 5583 : type = QUIC_PACKET_INITIAL;
1928 5583 : if (level == QUIC_CRYPTO_HANDSHAKE) {
1929 2901 : type = QUIC_PACKET_HANDSHAKE;
1930 2901 : fixed = !quic_outq(sk)->grease_quic_bit;
1931 2682 : } else if (level == QUIC_CRYPTO_EARLY) {
1932 38 : type = QUIC_PACKET_0RTT;
1933 : }
1934 :
1935 5583 : len = packet->len;
1936 5583 : if (packet->ack_eliciting) {
1937 : /* rfc9000#section-14.1:
1938 : *
1939 : * A client MUST expand the payload of all UDP datagrams carrying Initial packets
1940 : * to at least the smallest allowed maximum datagram size of 1200 bytes by adding
1941 : * PADDING frames to the Initial packet or by coalescing the Initial packet.
1942 : * Similarly, a server MUST expand the payload of all UDP datagrams carrying
1943 : * ack-eliciting Initial packets to at least the smallest allowed maximum datagram
1944 : * size of 1200 bytes.
1945 : */
1946 3180 : hlen = QUIC_MIN_UDP_PAYLOAD - packet->taglen[1];
1947 3180 : if (level == QUIC_CRYPTO_INITIAL && len < hlen) {
1948 1480 : len = hlen;
1949 1480 : plen = len - packet->len;
1950 : }
1951 : }
1952 5583 : if (packet->frames) {
1953 : /* If there are ack-eliciting frames (not including PING), create packet_sent
1954 : * for acknownledge and loss detection.
1955 : */
1956 3031 : sent = quic_packet_sent_alloc(packet->frames);
1957 3031 : if (!sent) { /* Move pending frames back to the outqueue. */
1958 0 : quic_outq_retransmit_list(sk, &packet->frame_list);
1959 0 : return NULL;
1960 : }
1961 : }
1962 :
1963 : /* Allocate skb with space for header + payload + AEAD taglen of Long Packet. */
1964 5583 : hlen = packet->hlen + MAX_HEADER;
1965 5583 : skb = alloc_skb(hlen + len + packet->taglen[1], GFP_ATOMIC);
1966 5583 : if (!skb) {
1967 0 : kfree(sent);
1968 0 : quic_outq_retransmit_list(sk, &packet->frame_list);
1969 0 : return NULL;
1970 : }
1971 5583 : skb->ignore_df = packet->ipfragok;
1972 5583 : skb_reserve(skb, (int)(hlen + len));
1973 :
1974 : /* Build Long Packet header. */
1975 5583 : hdr = skb_push(skb, len);
1976 5583 : hdr->form = 1;
1977 5583 : hdr->fixed = fixed;
1978 5583 : hdr->type = quic_packet_version_put_type(packet->version, type);
1979 5583 : hdr->reserved = 0;
1980 5583 : hdr->pnl = QUIC_PACKET_NUMBER_LEN - 1;
1981 5583 : skb_reset_transport_header(skb);
1982 :
1983 : /* Write the QUIC version. */
1984 5583 : p = (u8 *)hdr + QUIC_HLEN;
1985 5583 : p = quic_put_int(p, packet->version, QUIC_VERSION_LEN);
1986 :
1987 : /* Write Destination Connection ID. */
1988 5583 : active = quic_conn_id_active(dest);
1989 5583 : p = quic_put_int(p, active->len, 1);
1990 5583 : p = quic_put_data(p, active->data, active->len);
1991 :
1992 : /* Write Source Connection ID. */
1993 5583 : active = quic_conn_id_active(source);
1994 5583 : p = quic_put_int(p, active->len, 1);
1995 5583 : p = quic_put_data(p, active->data, active->len);
1996 :
1997 : /* Write Token if needed. */
1998 5583 : if (level == QUIC_CRYPTO_INITIAL) { /* Only Initial packet carries tokens. */
1999 2644 : hlen = 0;
2000 2644 : if (!quic_is_serv(sk)) /* Only clients send tokens. */
2001 1548 : hlen = quic_token(sk)->len;
2002 2644 : p = quic_put_var(p, hlen);
2003 2644 : p = quic_put_data(p, quic_token(sk)->data, hlen);
2004 : }
2005 :
2006 : /* Write Length. */
2007 5583 : off = (u16)(p + QUIC_PACKET_LENGTH_LEN - skb->data);
2008 5583 : p = quic_put_varint(p, len - off + QUIC_TAG_LEN, QUIC_PACKET_LENGTH_LEN);
2009 :
2010 : /* Pack Packet Number and actual frames starting at offset 'off'. */
2011 5583 : p = quic_packet_pack_frames(sk, skb, sent, off);
2012 5583 : if (plen) /* Set padding to zero. */
2013 2960 : memset(p, 0, plen);
2014 : return skb;
2015 : }
2016 :
2017 : /* Ensures the packet number is within the valid range. */
2018 6553766 : static int quic_packet_number_check(struct sock *sk)
2019 : {
2020 6553766 : struct quic_packet *packet = quic_packet(sk);
2021 6553766 : struct quic_pnspace *space;
2022 :
2023 : /* Check if the next packet number is within the allowed range. */
2024 6553766 : space = quic_pnspace(sk, packet->level);
2025 6553766 : if (space->next_pn + 1 <= QUIC_PN_MAP_MAX_PN)
2026 : return 0;
2027 :
2028 : /* Move pending frames back to the outqueue. */
2029 0 : quic_outq_retransmit_list(sk, &packet->frame_list);
2030 :
2031 : /* rfc9000#section-12.3:
2032 : *
2033 : * If the packet number for sending reaches 262-1, the sender MUST close the
2034 : * connection without sending a CONNECTION_CLOSE frame or any further packets.
2035 : */
2036 0 : if (!quic_is_closed(sk)) {
2037 0 : struct quic_connection_close close = {};
2038 :
2039 : /* Notify application that the connection is being closed. */
2040 0 : quic_inq_event_recv(sk, QUIC_EVENT_CONNECTION_CLOSE, &close);
2041 0 : quic_set_state(sk, QUIC_SS_CLOSED);
2042 : }
2043 : return -EPIPE;
2044 : }
2045 :
2046 : /* rfc9000#section-17.3.1:
2047 : *
2048 : * 1-RTT Packet {
2049 : * Header Form (1) = 0,
2050 : * Fixed Bit (1) = 1,
2051 : * Spin Bit (1),
2052 : * Reserved Bits (2),
2053 : * Key Phase (1),
2054 : * Packet Number Length (2),
2055 : * Destination Connection ID (0..160),
2056 : * Packet Number (8..32),
2057 : * Packet Payload (8..),
2058 : * }
2059 : *
2060 : * A 1-RTT packet uses a short packet header. It is used after the version and 1-RTT keys are
2061 : * negotiated.
2062 : */
2063 6548160 : static struct sk_buff *quic_packet_app_create(struct sock *sk)
2064 : {
2065 6548160 : struct quic_conn_id_set *id_set = quic_dest(sk);
2066 6548160 : struct quic_packet *packet = quic_packet(sk);
2067 6548160 : struct quic_packet_sent *sent = NULL;
2068 6548160 : struct quic_conn_id *active;
2069 6548160 : struct sk_buff *skb;
2070 6548160 : struct quichdr *hdr;
2071 6548160 : u32 len, hlen;
2072 6548160 : u16 off;
2073 :
2074 6548160 : if (packet->frames) {
2075 : /* If there are ack-eliciting frames (not including PING), create packet_sent
2076 : * for acknownledge and loss detection.
2077 : */
2078 6476819 : sent = quic_packet_sent_alloc(packet->frames);
2079 6477590 : if (!sent) { /* Move pending frames back to the outqueue. */
2080 0 : quic_outq_retransmit_list(sk, &packet->frame_list);
2081 0 : return NULL;
2082 : }
2083 : }
2084 :
2085 : /* Allocate skb with space for header + payload + AEAD taglen of Short Packet. */
2086 6548931 : len = packet->len;
2087 6548931 : hlen = packet->hlen + MAX_HEADER;
2088 6548931 : skb = alloc_skb(hlen + len + packet->taglen[0], GFP_ATOMIC);
2089 6551579 : if (!skb) { /* Move pending frames back to the outqueue. */
2090 0 : kfree(sent);
2091 0 : quic_outq_retransmit_list(sk, &packet->frame_list);
2092 0 : return NULL;
2093 : }
2094 6551579 : skb->ignore_df = packet->ipfragok;
2095 6551579 : skb_reserve(skb, (int)(hlen + len));
2096 :
2097 : /* Build Short Packet header. */
2098 6551579 : hdr = skb_push(skb, len);
2099 6551385 : hdr->form = 0;
2100 6551385 : hdr->fixed = !quic_outq(sk)->grease_quic_bit;
2101 6551385 : hdr->spin = 0;
2102 6551385 : hdr->reserved = 0;
2103 6551385 : hdr->pnl = QUIC_PACKET_NUMBER_LEN - 1;
2104 6551385 : skb_reset_transport_header(skb);
2105 :
2106 : /* Choose the active destination connection ID based on path. */
2107 6551385 : active = quic_conn_id_choose(id_set, packet->path);
2108 6551385 : quic_put_data((u8 *)hdr + QUIC_HLEN, active->data, active->len);
2109 6551383 : off = (u16)(active->len + sizeof(struct quichdr));
2110 :
2111 : /* Pack Packet Number and actual frames starting at offset 'off'. */
2112 6551383 : quic_packet_pack_frames(sk, skb, sent, off);
2113 6551383 : return skb;
2114 : }
2115 :
2116 : /* Update the MSS and inform congestion control. */
2117 2916 : void quic_packet_mss_update(struct sock *sk, u32 mss)
2118 : {
2119 2916 : struct quic_packet *packet = quic_packet(sk);
2120 2916 : struct quic_outqueue *outq = quic_outq(sk);
2121 2916 : struct quic_cong *cong = quic_cong(sk);
2122 :
2123 : /* Limit MSS for regular QUIC packets to the max UDP payload size. */
2124 2916 : if (outq->max_udp_payload_size && mss > outq->max_udp_payload_size)
2125 : mss = outq->max_udp_payload_size;
2126 2916 : packet->mss[0] = (u16)mss;
2127 :
2128 : /* Update congestion control with new payload space (excluding tag). */
2129 2916 : quic_cong_set_mss(cong, packet->mss[0] - packet->taglen[0]);
2130 2916 : quic_outq_sync_window(sk, cong->window);
2131 :
2132 : /* Limit MSS for DATAGRAM frame packets to the max datagram frame size. */
2133 2916 : if (outq->max_datagram_frame_size && mss > outq->max_datagram_frame_size)
2134 : mss = outq->max_datagram_frame_size;
2135 2916 : packet->mss[1] = (u16)mss;
2136 2916 : }
2137 :
2138 : /* Perform routing for the QUIC packet on the specified path, update header length and MSS
2139 : * accordingly, reset path and start PMTU timer.
2140 : */
2141 11013733 : int quic_packet_route(struct sock *sk)
2142 : {
2143 11013733 : struct quic_path_group *paths = quic_paths(sk);
2144 11013733 : struct quic_packet *packet = quic_packet(sk);
2145 11013733 : struct quic_config *c = quic_config(sk);
2146 11013733 : union quic_addr *sa, *da;
2147 11013733 : u32 pmtu;
2148 11013733 : int err;
2149 :
2150 11013733 : da = quic_path_daddr(paths, packet->path);
2151 11013733 : sa = quic_path_saddr(paths, packet->path);
2152 11013733 : err = quic_flow_route(sk, da, sa, &paths->fl);
2153 11014880 : if (err)
2154 : return err;
2155 :
2156 1911 : packet->hlen = quic_encap_len(da);
2157 1911 : pmtu = min_t(u32, dst_mtu(__sk_dst_get(sk)), QUIC_PATH_MAX_PMTU);
2158 1911 : quic_packet_mss_update(sk, pmtu - packet->hlen);
2159 :
2160 1911 : quic_path_pl_reset(paths);
2161 1911 : quic_timer_reset(sk, QUIC_TIMER_PMTU, c->plpmtud_probe_interval);
2162 1911 : return 0;
2163 : }
2164 :
2165 : /* Configure the QUIC packet header and routing based on encryption level and path. */
2166 13454758 : int quic_packet_config(struct sock *sk, u8 level, u8 path)
2167 : {
2168 13454758 : struct quic_conn_id_set *dest = quic_dest(sk), *source = quic_source(sk);
2169 13454758 : struct quic_packet *packet = quic_packet(sk);
2170 13454758 : struct quic_config *c = quic_config(sk);
2171 13454758 : u32 hlen = QUIC_HLEN;
2172 :
2173 : /* If packet already has data, no need to reconfigure. */
2174 13454758 : if (!quic_packet_empty(packet))
2175 : return 0;
2176 :
2177 11011718 : packet->ack_eliciting = 0;
2178 11011718 : packet->frame_len = 0;
2179 11011718 : packet->ipfragok = 0;
2180 11011718 : packet->padding = 0;
2181 11011718 : packet->frames = 0;
2182 11011718 : hlen += QUIC_PACKET_NUMBER_LEN; /* Packet number length. */
2183 11011718 : hlen += quic_conn_id_choose(dest, path)->len; /* DCID length. */
2184 11011718 : if (level) {
2185 8839 : hlen += 1; /* Length byte for DCID. */
2186 8839 : hlen += 1 + quic_conn_id_active(source)->len; /* Length byte + SCID length. */
2187 8839 : if (level == QUIC_CRYPTO_INITIAL) /* Include token for Initial packets. */
2188 3673 : hlen += quic_var_len(quic_token(sk)->len) + quic_token(sk)->len;
2189 8839 : hlen += QUIC_VERSION_LEN; /* Version length. */
2190 8839 : hlen += QUIC_PACKET_LENGTH_LEN; /* Packet length field length. */
2191 : /* Allow fragmentation if PLPMTUD is enabled, as it no longer relies on ICMP
2192 : * Toobig messages to discover the path MTU.
2193 : */
2194 8839 : packet->ipfragok = !!c->plpmtud_probe_interval;
2195 : }
2196 11011718 : packet->level = level;
2197 11011718 : packet->len = (u16)hlen;
2198 11011718 : packet->overhead = (u8)hlen;
2199 :
2200 11011718 : if (packet->path != path) { /* If the path changed, update and reset routing cache. */
2201 274 : packet->path = path;
2202 274 : __sk_dst_reset(sk);
2203 : }
2204 :
2205 : /* Perform routing and MSS update for the configured packet. */
2206 11011718 : if (quic_packet_route(sk) < 0)
2207 0 : return -1;
2208 : return 0;
2209 : }
2210 :
2211 0 : static void quic_packet_encrypt_done(struct sk_buff *skb, int err)
2212 : {
2213 0 : if (err) {
2214 0 : QUIC_INC_STATS(sock_net(skb->sk), QUIC_MIB_PKT_ENCDROP);
2215 0 : kfree_skb(skb);
2216 0 : pr_debug("%s: err: %d\n", __func__, err);
2217 0 : return;
2218 : }
2219 :
2220 : /* Encryption succeeded: queue the encrypted skb for asynchronous transmission. */
2221 0 : quic_outq_encrypted_tail(skb->sk, skb);
2222 : }
2223 :
2224 : /* Coalescing Packets. */
2225 6558712 : static int quic_packet_bundle(struct sock *sk, struct sk_buff *skb)
2226 : {
2227 6558712 : struct quic_skb_cb *head_cb, *cb = QUIC_SKB_CB(skb);
2228 6558712 : struct quic_packet *packet = quic_packet(sk);
2229 6558712 : struct sk_buff *p;
2230 :
2231 6558712 : if (!packet->head) { /* First packet to bundle: initialize the head. */
2232 6556499 : packet->head = skb;
2233 6556499 : cb->last = skb;
2234 6556499 : goto out;
2235 : }
2236 :
2237 : /* If bundling would exceed MSS, flush the current bundle. */
2238 2213 : if (packet->head->len + skb->len >= packet->mss[0]) {
2239 421 : quic_packet_flush(sk);
2240 421 : packet->head = skb;
2241 421 : cb->last = skb;
2242 421 : goto out;
2243 : }
2244 : /* Bundle it and update metadata for the aggregate skb. */
2245 1792 : p = packet->head;
2246 1792 : head_cb = QUIC_SKB_CB(p);
2247 1792 : if (head_cb->last == p)
2248 1198 : skb_shinfo(p)->frag_list = skb;
2249 : else
2250 594 : head_cb->last->next = skb;
2251 1792 : p->data_len += skb->len;
2252 1792 : p->truesize += skb->truesize;
2253 1792 : p->len += skb->len;
2254 1792 : head_cb->last = skb;
2255 1792 : head_cb->ecn |= cb->ecn; /* Merge ECN flags. */
2256 :
2257 6558712 : out:
2258 : /* rfc9000#section-12.2:
2259 : * Packets with a short header (Section 17.3) do not contain a Length field and so
2260 : * cannot be followed by other packets in the same UDP datagram.
2261 : *
2262 : * so Return 1 to flush if it is a Short header packet.
2263 : */
2264 6558712 : return !cb->level;
2265 : }
2266 :
2267 : /* Transmit a QUIC packet, possibly encrypting and bundling it. */
2268 6556128 : int quic_packet_xmit(struct sock *sk, struct sk_buff *skb)
2269 : {
2270 6556128 : struct quic_packet *packet = quic_packet(sk);
2271 6556128 : struct quic_skb_cb *cb = QUIC_SKB_CB(skb);
2272 6556128 : struct net *net = sock_net(sk);
2273 6556128 : int err;
2274 :
2275 : /* Associate skb with sk to ensure sk is valid during async encryption completion. */
2276 6556128 : WARN_ON(!skb_set_owner_sk_safe(skb, sk));
2277 :
2278 : /* Skip encryption if taglen == 0 (e.g., disable_1rtt_encryption). */
2279 6558768 : if (!packet->taglen[quic_hdr(skb)->form])
2280 1550674 : goto xmit;
2281 :
2282 5008094 : cb->crypto_done = quic_packet_encrypt_done;
2283 5008094 : err = quic_crypto_encrypt(quic_crypto(sk, packet->level), skb);
2284 5008036 : if (err) {
2285 0 : if (err != -EINPROGRESS) {
2286 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_ENCDROP);
2287 0 : kfree_skb(skb);
2288 0 : return err;
2289 : }
2290 0 : QUIC_INC_STATS(net, QUIC_MIB_PKT_ENCBACKLOGS);
2291 0 : return err;
2292 : }
2293 5008036 : if (!cb->resume) /* Encryption completes synchronously. */
2294 5008036 : QUIC_INC_STATS(net, QUIC_MIB_PKT_ENCFASTPATHS);
2295 :
2296 0 : xmit:
2297 6558709 : if (quic_packet_bundle(sk, skb))
2298 6553142 : quic_packet_flush(sk);
2299 : return 0;
2300 : }
2301 :
2302 : /* Create and transmit a new QUIC packet. */
2303 6553761 : int quic_packet_create(struct sock *sk)
2304 : {
2305 6553761 : struct quic_packet *packet = quic_packet(sk);
2306 6553761 : struct sk_buff *skb;
2307 6553761 : int err;
2308 :
2309 6553761 : err = quic_packet_number_check(sk);
2310 6553799 : if (err)
2311 0 : goto err;
2312 :
2313 6553799 : if (packet->level)
2314 5583 : skb = quic_packet_handshake_create(sk);
2315 : else
2316 6548216 : skb = quic_packet_app_create(sk);
2317 6556137 : if (!skb) {
2318 0 : err = -ENOMEM;
2319 0 : goto err;
2320 : }
2321 :
2322 6556137 : err = quic_packet_xmit(sk, skb);
2323 6558713 : if (err && err != -EINPROGRESS)
2324 0 : goto err;
2325 :
2326 : /* Return 1 if at least one ACK-eliciting (non-PING) frame was sent. */
2327 6558713 : return !!packet->frames;
2328 0 : err:
2329 0 : pr_debug("%s: err: %d\n", __func__, err);
2330 : return 0;
2331 : }
2332 :
2333 : /* Flush any coalesced/bundled QUIC packets. */
2334 19731655 : void quic_packet_flush(struct sock *sk)
2335 : {
2336 19731655 : struct quic_path_group *paths = quic_paths(sk);
2337 19731655 : struct quic_packet *packet = quic_packet(sk);
2338 :
2339 19731655 : if (packet->head) {
2340 6556929 : quic_lower_xmit(sk, packet->head,
2341 6556929 : quic_path_daddr(paths, packet->path), &paths->fl);
2342 6556916 : packet->head = NULL;
2343 : }
2344 19731642 : }
2345 :
2346 : /* Append a frame to the tail of the current QUIC packet. */
2347 8944145 : int quic_packet_tail(struct sock *sk, struct quic_frame *frame)
2348 : {
2349 8944145 : struct quic_packet *packet = quic_packet(sk);
2350 8944145 : u8 taglen;
2351 :
2352 : /* Reject frame if it doesn't match the packet's encryption level or path, or if
2353 : * padding is already in place (no further frames should be added).
2354 : */
2355 8944145 : if (frame->level != (packet->level % QUIC_CRYPTO_EARLY) ||
2356 8941873 : frame->path != packet->path || packet->padding)
2357 : return 0;
2358 :
2359 : /* Check if frame would exceed the current datagram MSS (excluding AEAD tag). */
2360 8943802 : taglen = quic_packet_taglen(packet);
2361 8943802 : if (packet->len + frame->len > packet->mss[frame->dgram] - taglen) {
2362 : /* If some data has already been added to the packet, bail out. */
2363 2047885 : if (packet->len != packet->overhead)
2364 : return 0;
2365 : /* Otherwise, allow IP fragmentation for this packet unless it’s a PING probe. */
2366 2047 : if (!quic_frame_ping(frame->type))
2367 2046 : packet->ipfragok = 1;
2368 : }
2369 6897964 : if (frame->padding)
2370 193 : packet->padding = frame->padding;
2371 :
2372 : /* Track frames that require retransmission if lost (i.e., ACK-eliciting and non-PING). */
2373 6897964 : if (frame->ack_eliciting) {
2374 6518708 : packet->ack_eliciting = 1;
2375 6518708 : if (!quic_frame_ping(frame->type)) {
2376 6518507 : packet->frames++;
2377 6518507 : packet->frame_len += frame->len;
2378 : }
2379 : }
2380 :
2381 6897964 : list_move_tail(&frame->list, &packet->frame_list);
2382 6897893 : packet->len += frame->len;
2383 6897893 : return frame->len;
2384 : }
2385 :
2386 1119 : void quic_packet_init(struct sock *sk)
2387 : {
2388 1119 : struct quic_packet *packet = quic_packet(sk);
2389 :
2390 1119 : INIT_LIST_HEAD(&packet->frame_list);
2391 1119 : packet->taglen[0] = QUIC_TAG_LEN;
2392 1119 : packet->taglen[1] = QUIC_TAG_LEN;
2393 1119 : packet->mss[0] = QUIC_TAG_LEN;
2394 1119 : packet->mss[1] = QUIC_TAG_LEN;
2395 :
2396 1119 : packet->version = QUIC_VERSION_V1;
2397 1119 : }
|