1 | /* $NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $ */ |
2 | /*- |
3 | * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting |
4 | * All rights reserved. |
5 | * |
6 | * Redistribution and use in source and binary forms, with or without |
7 | * modification, are permitted provided that the following conditions |
8 | * are met: |
9 | * 1. Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * 2. Redistributions in binary form must reproduce the above copyright |
12 | * notice, this list of conditions and the following disclaimer in the |
13 | * documentation and/or other materials provided with the distribution. |
14 | * |
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
25 | * SUCH DAMAGE. |
26 | * |
27 | * $FreeBSD: /repoman/r/ncvs/src/sys/netipsec/ipsec_mbuf.c,v 1.5.2.2 2003/03/28 20:32:53 sam Exp $ |
28 | */ |
29 | |
30 | #include <sys/cdefs.h> |
31 | __KERNEL_RCSID(0, "$NetBSD: ipsec_mbuf.c,v 1.12 2011/05/16 10:05:23 drochner Exp $" ); |
32 | |
33 | /* |
34 | * IPsec-specific mbuf routines. |
35 | */ |
36 | |
37 | #ifdef __FreeBSD__ |
38 | #include "opt_param.h" |
39 | #endif |
40 | |
41 | #include <sys/param.h> |
42 | #include <sys/systm.h> |
43 | #include <sys/mbuf.h> |
44 | #include <sys/socket.h> |
45 | |
46 | #include <net/route.h> |
47 | #include <netinet/in.h> |
48 | |
49 | #include <netipsec/ipsec.h> |
50 | #include <netipsec/ipsec_var.h> |
51 | #include <netipsec/ipsec_private.h> |
52 | |
53 | #include <netipsec/ipsec_osdep.h> |
54 | #include <net/net_osdep.h> |
55 | |
56 | /* |
57 | * Create a writable copy of the mbuf chain. While doing this |
58 | * we compact the chain with a goal of producing a chain with |
59 | * at most two mbufs. The second mbuf in this chain is likely |
60 | * to be a cluster. The primary purpose of this work is to create |
61 | * a writable packet for encryption, compression, etc. The |
62 | * secondary goal is to linearize the data so the data can be |
63 | * passed to crypto hardware in the most efficient manner possible. |
64 | */ |
65 | struct mbuf * |
66 | m_clone(struct mbuf *m0) |
67 | { |
68 | struct mbuf *m, *mprev; |
69 | struct mbuf *n, *mfirst, *mlast; |
70 | int len, off; |
71 | |
72 | IPSEC_ASSERT(m0 != NULL, ("m_clone: null mbuf" )); |
73 | |
74 | mprev = NULL; |
75 | for (m = m0; m != NULL; m = mprev->m_next) { |
76 | /* |
77 | * Regular mbufs are ignored unless there's a cluster |
78 | * in front of it that we can use to coalesce. We do |
79 | * the latter mainly so later clusters can be coalesced |
80 | * also w/o having to handle them specially (i.e. convert |
81 | * mbuf+cluster -> cluster). This optimization is heavily |
82 | * influenced by the assumption that we're running over |
83 | * Ethernet where MCLBYTES is large enough that the max |
84 | * packet size will permit lots of coalescing into a |
85 | * single cluster. This in turn permits efficient |
86 | * crypto operations, especially when using hardware. |
87 | */ |
88 | if ((m->m_flags & M_EXT) == 0) { |
89 | if (mprev && (mprev->m_flags & M_EXT) && |
90 | m->m_len <= M_TRAILINGSPACE(mprev)) { |
91 | /* XXX: this ignores mbuf types */ |
92 | memcpy(mtod(mprev, char *) + mprev->m_len, |
93 | mtod(m, char *), m->m_len); |
94 | mprev->m_len += m->m_len; |
95 | mprev->m_next = m->m_next; /* unlink from chain */ |
96 | m_free(m); /* reclaim mbuf */ |
97 | IPSEC_STATINC(IPSEC_STAT_MBCOALESCED); |
98 | } else { |
99 | mprev = m; |
100 | } |
101 | continue; |
102 | } |
103 | /* |
104 | * Writable mbufs are left alone (for now). Note |
105 | * that for 4.x systems it's not possible to identify |
106 | * whether or not mbufs with external buffers are |
107 | * writable unless they use clusters. |
108 | */ |
109 | if (M_EXT_WRITABLE(m)) { |
110 | mprev = m; |
111 | continue; |
112 | } |
113 | |
114 | /* |
115 | * Not writable, replace with a copy or coalesce with |
116 | * the previous mbuf if possible (since we have to copy |
117 | * it anyway, we try to reduce the number of mbufs and |
118 | * clusters so that future work is easier). |
119 | */ |
120 | IPSEC_ASSERT(m->m_flags & M_EXT, |
121 | ("m_clone: m_flags 0x%x" , m->m_flags)); |
122 | /* NB: we only coalesce into a cluster or larger */ |
123 | if (mprev != NULL && (mprev->m_flags & M_EXT) && |
124 | m->m_len <= M_TRAILINGSPACE(mprev)) { |
125 | /* XXX: this ignores mbuf types */ |
126 | memcpy(mtod(mprev, char *) + mprev->m_len, |
127 | mtod(m, char *), m->m_len); |
128 | mprev->m_len += m->m_len; |
129 | mprev->m_next = m->m_next; /* unlink from chain */ |
130 | m_free(m); /* reclaim mbuf */ |
131 | IPSEC_STATINC(IPSEC_STAT_CLCOALESCED); |
132 | continue; |
133 | } |
134 | |
135 | /* |
136 | * Allocate new space to hold the copy... |
137 | */ |
138 | /* XXX why can M_PKTHDR be set past the first mbuf? */ |
139 | if (mprev == NULL && (m->m_flags & M_PKTHDR)) { |
140 | /* |
141 | * NB: if a packet header is present we must |
142 | * allocate the mbuf separately from any cluster |
143 | * because M_MOVE_PKTHDR will smash the data |
144 | * pointer and drop the M_EXT marker. |
145 | */ |
146 | MGETHDR(n, M_DONTWAIT, m->m_type); |
147 | if (n == NULL) { |
148 | m_freem(m0); |
149 | return (NULL); |
150 | } |
151 | M_MOVE_PKTHDR(n, m); |
152 | MCLGET(n, M_DONTWAIT); |
153 | if ((n->m_flags & M_EXT) == 0) { |
154 | m_free(n); |
155 | m_freem(m0); |
156 | return (NULL); |
157 | } |
158 | } else { |
159 | n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); |
160 | if (n == NULL) { |
161 | m_freem(m0); |
162 | return (NULL); |
163 | } |
164 | } |
165 | /* |
166 | * ... and copy the data. We deal with jumbo mbufs |
167 | * (i.e. m_len > MCLBYTES) by splitting them into |
168 | * clusters. We could just malloc a buffer and make |
169 | * it external but too many device drivers don't know |
170 | * how to break up the non-contiguous memory when |
171 | * doing DMA. |
172 | */ |
173 | len = m->m_len; |
174 | off = 0; |
175 | mfirst = n; |
176 | mlast = NULL; |
177 | for (;;) { |
178 | int cc = min(len, MCLBYTES); |
179 | memcpy(mtod(n, char *), mtod(m, char *) + off, cc); |
180 | n->m_len = cc; |
181 | if (mlast != NULL) |
182 | mlast->m_next = n; |
183 | mlast = n; |
184 | IPSEC_STATINC(IPSEC_STAT_CLCOPIED); |
185 | |
186 | len -= cc; |
187 | if (len <= 0) |
188 | break; |
189 | off += cc; |
190 | |
191 | n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); |
192 | if (n == NULL) { |
193 | m_freem(mfirst); |
194 | m_freem(m0); |
195 | return (NULL); |
196 | } |
197 | } |
198 | n->m_next = m->m_next; |
199 | if (mprev == NULL) |
200 | m0 = mfirst; /* new head of chain */ |
201 | else |
202 | mprev->m_next = mfirst; /* replace old mbuf */ |
203 | m_free(m); /* release old mbuf */ |
204 | mprev = mfirst; |
205 | } |
206 | return (m0); |
207 | } |
208 | |
209 | /* |
210 | * Make space for a new header of length hlen at skip bytes |
211 | * into the packet. When doing this we allocate new mbufs only |
212 | * when absolutely necessary. The mbuf where the new header |
213 | * is to go is returned together with an offset into the mbuf. |
214 | * If NULL is returned then the mbuf chain may have been modified; |
215 | * the caller is assumed to always free the chain. |
216 | */ |
217 | struct mbuf * |
218 | m_makespace(struct mbuf *m0, int skip, int hlen, int *off) |
219 | { |
220 | struct mbuf *m; |
221 | unsigned remain; |
222 | |
223 | IPSEC_ASSERT(m0 != NULL, ("m_dmakespace: null mbuf" )); |
224 | IPSEC_ASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u" , hlen)); |
225 | |
226 | for (m = m0; m && skip > m->m_len; m = m->m_next) |
227 | skip -= m->m_len; |
228 | if (m == NULL) |
229 | return (NULL); |
230 | /* |
231 | * At this point skip is the offset into the mbuf m |
232 | * where the new header should be placed. Figure out |
233 | * if there's space to insert the new header. If so, |
234 | * and copying the remainder makese sense then do so. |
235 | * Otherwise insert a new mbuf in the chain, splitting |
236 | * the contents of m as needed. |
237 | */ |
238 | remain = m->m_len - skip; /* data to move */ |
239 | if (hlen > M_TRAILINGSPACE(m)) { |
240 | struct mbuf *n0, *n, **np; |
241 | int todo, len, done, alloc; |
242 | |
243 | n0 = NULL; |
244 | np = &n0; |
245 | alloc = 0; |
246 | done = 0; |
247 | todo = remain; |
248 | while (todo > 0) { |
249 | if (todo > MHLEN) { |
250 | n = m_getcl(M_DONTWAIT, m->m_type, 0); |
251 | len = MCLBYTES; |
252 | } |
253 | else { |
254 | n = m_get(M_DONTWAIT, m->m_type); |
255 | len = MHLEN; |
256 | } |
257 | if (n == NULL) { |
258 | m_freem(n0); |
259 | return NULL; |
260 | } |
261 | *np = n; |
262 | np = &n->m_next; |
263 | alloc++; |
264 | len = min(todo, len); |
265 | memcpy(n->m_data, mtod(m, char *) + skip + done, len); |
266 | n->m_len = len; |
267 | done += len; |
268 | todo -= len; |
269 | } |
270 | |
271 | if (hlen <= M_TRAILINGSPACE(m) + remain) { |
272 | m->m_len = skip + hlen; |
273 | *off = skip; |
274 | if (n0 != NULL) { |
275 | *np = m->m_next; |
276 | m->m_next = n0; |
277 | } |
278 | } |
279 | else { |
280 | n = m_get(M_DONTWAIT, m->m_type); |
281 | if (n == NULL) { |
282 | m_freem(n0); |
283 | return NULL; |
284 | } |
285 | alloc++; |
286 | |
287 | if ((n->m_next = n0) == NULL) |
288 | np = &n->m_next; |
289 | n0 = n; |
290 | |
291 | *np = m->m_next; |
292 | m->m_next = n0; |
293 | |
294 | n->m_len = hlen; |
295 | m->m_len = skip; |
296 | |
297 | m = n; /* header is at front ... */ |
298 | *off = 0; /* ... of new mbuf */ |
299 | } |
300 | |
301 | IPSEC_STATADD(IPSEC_STAT_MBINSERTED, alloc); |
302 | } else { |
303 | /* |
304 | * Copy the remainder to the back of the mbuf |
305 | * so there's space to write the new header. |
306 | */ |
307 | /* XXX can this be memcpy? does it handle overlap? */ |
308 | ovbcopy(mtod(m, char *) + skip, |
309 | mtod(m, char *) + skip + hlen, remain); |
310 | m->m_len += hlen; |
311 | *off = skip; |
312 | } |
313 | m0->m_pkthdr.len += hlen; /* adjust packet length */ |
314 | return m; |
315 | } |
316 | |
317 | /* |
318 | * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header |
319 | * length is updated, and a pointer to the first byte of the padding |
320 | * (which is guaranteed to be all in one mbuf) is returned. |
321 | */ |
322 | void * |
323 | m_pad(struct mbuf *m, int n) |
324 | { |
325 | register struct mbuf *m0, *m1; |
326 | register int len, pad; |
327 | void *retval; |
328 | |
329 | if (n <= 0) { /* No stupid arguments. */ |
330 | DPRINTF(("m_pad: pad length invalid (%d)\n" , n)); |
331 | m_freem(m); |
332 | return NULL; |
333 | } |
334 | |
335 | len = m->m_pkthdr.len; |
336 | pad = n; |
337 | m0 = m; |
338 | |
339 | while (m0->m_len < len) { |
340 | IPSEC_ASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u" , len, m0->m_len));/*XXX*/ |
341 | len -= m0->m_len; |
342 | m0 = m0->m_next; |
343 | } |
344 | |
345 | if (m0->m_len != len) { |
346 | DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n" , |
347 | m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); |
348 | |
349 | m_freem(m); |
350 | return NULL; |
351 | } |
352 | |
353 | /* Check for zero-length trailing mbufs, and find the last one. */ |
354 | for (m1 = m0; m1->m_next; m1 = m1->m_next) { |
355 | if (m1->m_next->m_len != 0) { |
356 | DPRINTF(("m_pad: length mismatch (should be %d " |
357 | "instead of %d)\n" , |
358 | m->m_pkthdr.len, |
359 | m->m_pkthdr.len + m1->m_next->m_len)); |
360 | |
361 | m_freem(m); |
362 | return NULL; |
363 | } |
364 | |
365 | m0 = m1->m_next; |
366 | } |
367 | |
368 | if (pad > M_TRAILINGSPACE(m0)) { |
369 | /* Add an mbuf to the chain. */ |
370 | MGET(m1, M_DONTWAIT, MT_DATA); |
371 | if (m1 == 0) { |
372 | m_freem(m0); |
373 | DPRINTF(("m_pad: unable to get extra mbuf\n" )); |
374 | return NULL; |
375 | } |
376 | |
377 | m0->m_next = m1; |
378 | m0 = m1; |
379 | m0->m_len = 0; |
380 | } |
381 | |
382 | retval = m0->m_data + m0->m_len; |
383 | m0->m_len += pad; |
384 | m->m_pkthdr.len += pad; |
385 | |
386 | return retval; |
387 | } |
388 | |
389 | /* |
390 | * Remove hlen data at offset skip in the packet. This is used by |
391 | * the protocols strip protocol headers and associated data (e.g. IV, |
392 | * authenticator) on input. |
393 | */ |
394 | int |
395 | m_striphdr(struct mbuf *m, int skip, int hlen) |
396 | { |
397 | struct mbuf *m1; |
398 | int roff; |
399 | |
400 | /* Find beginning of header */ |
401 | m1 = m_getptr(m, skip, &roff); |
402 | if (m1 == NULL) |
403 | return (EINVAL); |
404 | |
405 | /* Remove the header and associated data from the mbuf. */ |
406 | if (roff == 0) { |
407 | /* The header was at the beginning of the mbuf */ |
408 | IPSEC_STATINC(IPSEC_STAT_INPUT_FRONT); |
409 | m_adj(m1, hlen); |
410 | if ((m1->m_flags & M_PKTHDR) == 0) |
411 | m->m_pkthdr.len -= hlen; |
412 | } else if (roff + hlen >= m1->m_len) { |
413 | struct mbuf *mo; |
414 | |
415 | /* |
416 | * Part or all of the header is at the end of this mbuf, |
417 | * so first let's remove the remainder of the header from |
418 | * the beginning of the remainder of the mbuf chain, if any. |
419 | */ |
420 | IPSEC_STATINC(IPSEC_STAT_INPUT_END); |
421 | if (roff + hlen > m1->m_len) { |
422 | /* Adjust the next mbuf by the remainder */ |
423 | m_adj(m1->m_next, roff + hlen - m1->m_len); |
424 | |
425 | /* The second mbuf is guaranteed not to have a pkthdr... */ |
426 | m->m_pkthdr.len -= (roff + hlen - m1->m_len); |
427 | } |
428 | |
429 | /* Now, let's unlink the mbuf chain for a second...*/ |
430 | mo = m1->m_next; |
431 | m1->m_next = NULL; |
432 | |
433 | /* ...and trim the end of the first part of the chain...sick */ |
434 | m_adj(m1, -(m1->m_len - roff)); |
435 | if ((m1->m_flags & M_PKTHDR) == 0) |
436 | m->m_pkthdr.len -= (m1->m_len - roff); |
437 | |
438 | /* Finally, let's relink */ |
439 | m1->m_next = mo; |
440 | } else { |
441 | /* |
442 | * The header lies in the "middle" of the mbuf; copy |
443 | * the remainder of the mbuf down over the header. |
444 | */ |
445 | IPSEC_STATINC(IPSEC_STAT_INPUT_MIDDLE); |
446 | ovbcopy(mtod(m1, u_char *) + roff + hlen, |
447 | mtod(m1, u_char *) + roff, |
448 | m1->m_len - (roff + hlen)); |
449 | m1->m_len -= hlen; |
450 | m->m_pkthdr.len -= hlen; |
451 | } |
452 | return (0); |
453 | } |
454 | |
455 | /* |
456 | * Diagnostic routine to check mbuf alignment as required by the |
457 | * crypto device drivers (that use DMA). |
458 | */ |
459 | void |
460 | m_checkalignment(const char* where, struct mbuf *m0, int off, int len) |
461 | { |
462 | int roff; |
463 | struct mbuf *m = m_getptr(m0, off, &roff); |
464 | void *addr; |
465 | |
466 | if (m == NULL) |
467 | return; |
468 | printf("%s (off %u len %u): " , where, off, len); |
469 | addr = mtod(m, char *) + roff; |
470 | do { |
471 | int mlen; |
472 | |
473 | if (((uintptr_t) addr) & 3) { |
474 | printf("addr misaligned %p," , addr); |
475 | break; |
476 | } |
477 | mlen = m->m_len; |
478 | if (mlen > len) |
479 | mlen = len; |
480 | len -= mlen; |
481 | if (len && (mlen & 3)) { |
482 | printf("len mismatch %u," , mlen); |
483 | break; |
484 | } |
485 | m = m->m_next; |
486 | addr = m ? mtod(m, void *) : NULL; |
487 | } while (m && len > 0); |
488 | for (m = m0; m; m = m->m_next) |
489 | printf(" [%p:%u]" , mtod(m, void *), m->m_len); |
490 | printf("\n" ); |
491 | } |
492 | |