Print this page
8634 epoll fails to wake on certain edge-triggered conditions
8635 epoll should not emit POLLNVAL
8636 recursive epoll should emit EPOLLRDNORM
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/io/bpf/bpf.c
+++ new/usr/src/uts/common/io/bpf/bpf.c
1 1 /* $NetBSD: bpf.c,v 1.143 2009/03/11 05:55:22 mrg Exp $ */
2 2
3 3 /*
4 4 * Copyright (c) 1990, 1991, 1993
5 5 * The Regents of the University of California. All rights reserved.
6 6 *
7 7 * This code is derived from the Stanford/CMU enet packet filter,
8 8 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
9 9 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10 10 * Berkeley Laboratory.
11 11 *
12 12 * Redistribution and use in source and binary forms, with or without
13 13 * modification, are permitted provided that the following conditions
14 14 * are met:
15 15 * 1. Redistributions of source code must retain the above copyright
16 16 * notice, this list of conditions and the following disclaimer.
17 17 * 2. Redistributions in binary form must reproduce the above copyright
18 18 * notice, this list of conditions and the following disclaimer in the
19 19 * documentation and/or other materials provided with the distribution.
20 20 * 3. Neither the name of the University nor the names of its contributors
21 21 * may be used to endorse or promote products derived from this software
22 22 * without specific prior written permission.
23 23 *
24 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
↓ open down ↓ |
32 lines elided |
↑ open up ↑ |
33 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 34 * SUCH DAMAGE.
35 35 *
36 36 * @(#)bpf.c 8.4 (Berkeley) 1/9/95
37 37 * static char rcsid[] =
38 38 * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
39 39 */
40 40 /*
41 41 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
42 42 * Use is subject to license terms.
43 + * Copyright 2017 Joyent, Inc.
43 44 */
44 45
45 46 /*
46 47 * The BPF implements the following access controls for zones attempting
47 48 * to read and write data. Writing of data requires that the net_rawaccess
48 49 * privilege is held whilst reading data requires either net_rawaccess or
49 50 * net_observerability.
50 51 *
51 52 * | Shared | Exclusive | Global
52 53 * -----------------------------+--------+------------+------------+
53 54 * DLT_IPNET in local zone | Read | Read | Read |
54 55 * -----------------------------+--------+------------+------------+
55 56 * Raw access to local zone NIC | None | Read/Write | Read/Write |
56 57 * -----------------------------+--------+------------+------------+
57 58 * Raw access to all NICs | None | None | Read/Write |
58 59 * -----------------------------+--------+------------+------------+
59 60 *
60 61 * The BPF driver is written as a cloning driver: each call to bpfopen()
61 62 * allocates a new minor number. This provides BPF with a 1:1 relationship
62 63 * between open's and close's. There is some amount of "descriptor state"
63 64 * that is kept per open. Pointers to this data are stored in a hash table
64 65 * (bpf_hash) that is index'd by the minor device number for each open file.
65 66 */
66 67 #include <sys/param.h>
67 68 #include <sys/systm.h>
68 69 #include <sys/time.h>
69 70 #include <sys/ioctl.h>
70 71 #include <sys/queue.h>
71 72 #include <sys/filio.h>
72 73 #include <sys/policy.h>
73 74 #include <sys/cmn_err.h>
74 75 #include <sys/uio.h>
75 76 #include <sys/file.h>
76 77 #include <sys/sysmacros.h>
77 78 #include <sys/zone.h>
78 79
79 80 #include <sys/socket.h>
80 81 #include <sys/errno.h>
81 82 #include <sys/poll.h>
82 83 #include <sys/dlpi.h>
83 84 #include <sys/neti.h>
84 85
85 86 #include <net/if.h>
86 87
87 88 #include <net/bpf.h>
88 89 #include <net/bpfdesc.h>
89 90 #include <net/dlt.h>
90 91
91 92 #include <netinet/in.h>
92 93 #include <sys/mac.h>
93 94 #include <sys/mac_client.h>
94 95 #include <sys/mac_impl.h>
95 96 #include <sys/time_std_impl.h>
96 97 #include <sys/hook.h>
97 98 #include <sys/hook_event.h>
98 99
99 100
100 101 #define mtod(_v, _t) (_t)((_v)->b_rptr)
101 102 #define M_LEN(_m) ((_m)->b_wptr - (_m)->b_rptr)
102 103
103 104 /*
104 105 * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
105 106 * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
106 107 */
107 108 #define BPF_BUFSIZE (32 * 1024)
108 109
109 110 typedef void *(*cp_fn_t)(void *, const void *, size_t);
110 111
111 112 /*
112 113 * The default read buffer size, and limit for BIOCSBLEN.
113 114 */
114 115 int bpf_bufsize = BPF_BUFSIZE;
115 116 int bpf_maxbufsize = (16 * 1024 * 1024);
116 117 static mod_hash_t *bpf_hash = NULL;
117 118
118 119 /*
119 120 * Use a mutex to avoid a race condition between gathering the stats/peers
120 121 * and opening/closing the device.
121 122 */
122 123 static kcondvar_t bpf_dlt_waiter;
123 124 static kmutex_t bpf_mtx;
124 125 static bpf_kstats_t ks_stats;
125 126 static bpf_kstats_t bpf_kstats = {
126 127 { "readWait", KSTAT_DATA_UINT64 },
127 128 { "writeOk", KSTAT_DATA_UINT64 },
128 129 { "writeError", KSTAT_DATA_UINT64 },
129 130 { "receive", KSTAT_DATA_UINT64 },
130 131 { "captured", KSTAT_DATA_UINT64 },
131 132 { "dropped", KSTAT_DATA_UINT64 },
132 133 };
133 134 static kstat_t *bpf_ksp;
134 135
135 136 /*
136 137 * bpf_list is a list of the BPF descriptors currently open
137 138 */
138 139 LIST_HEAD(, bpf_d) bpf_list;
139 140
140 141 static int bpf_allocbufs(struct bpf_d *);
141 142 static void bpf_clear_timeout(struct bpf_d *);
142 143 static void bpf_deliver(struct bpf_d *, cp_fn_t,
143 144 void *, uint_t, uint_t, boolean_t);
144 145 static void bpf_freed(struct bpf_d *);
145 146 static int bpf_ifname(struct bpf_d *d, char *, int);
146 147 static void *bpf_mcpy(void *, const void *, size_t);
147 148 static int bpf_attachd(struct bpf_d *, const char *, int);
148 149 static void bpf_detachd(struct bpf_d *);
149 150 static int bpf_setif(struct bpf_d *, char *, int);
150 151 static void bpf_timed_out(void *);
151 152 static inline void
152 153 bpf_wakeup(struct bpf_d *);
153 154 static void catchpacket(struct bpf_d *, uchar_t *, uint_t, uint_t,
154 155 cp_fn_t, struct timeval *);
155 156 static void reset_d(struct bpf_d *);
156 157 static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
157 158 static int bpf_setdlt(struct bpf_d *, void *);
158 159 static void bpf_dev_add(struct bpf_d *);
159 160 static struct bpf_d *bpf_dev_find(minor_t);
160 161 static struct bpf_d *bpf_dev_get(minor_t);
161 162 static void bpf_dev_remove(struct bpf_d *);
162 163
163 164 static int
164 165 bpf_movein(struct uio *uio, int linktype, int mtu, mblk_t **mp)
165 166 {
166 167 mblk_t *m;
167 168 int error;
168 169 int len;
169 170 int hlen;
170 171 int align;
171 172
172 173 /*
173 174 * Build a sockaddr based on the data link layer type.
174 175 * We do this at this level because the ethernet header
175 176 * is copied directly into the data field of the sockaddr.
176 177 * In the case of SLIP, there is no header and the packet
177 178 * is forwarded as is.
178 179 * Also, we are careful to leave room at the front of the mbuf
179 180 * for the link level header.
180 181 */
181 182 switch (linktype) {
182 183
183 184 case DLT_EN10MB:
184 185 hlen = sizeof (struct ether_header);
185 186 break;
186 187
187 188 case DLT_FDDI:
188 189 hlen = 16;
189 190 break;
190 191
191 192 case DLT_NULL:
192 193 hlen = 0;
193 194 break;
194 195
195 196 case DLT_IPOIB:
196 197 hlen = 44;
197 198 break;
198 199
199 200 default:
200 201 return (EIO);
201 202 }
202 203
203 204 align = 4 - (hlen & 3);
204 205
205 206 len = uio->uio_resid;
206 207 /*
207 208 * If there aren't enough bytes for a link level header or the
208 209 * packet length exceeds the interface mtu, return an error.
209 210 */
210 211 if (len < hlen || len - hlen > mtu)
211 212 return (EMSGSIZE);
212 213
213 214 m = allocb(len + align, BPRI_MED);
214 215 if (m == NULL) {
215 216 error = ENOBUFS;
216 217 goto bad;
217 218 }
218 219
219 220 /* Insure the data is properly aligned */
220 221 if (align > 0)
221 222 m->b_rptr += align;
222 223 m->b_wptr = m->b_rptr + len;
223 224
224 225 error = uiomove(mtod(m, void *), len, UIO_WRITE, uio);
225 226 if (error)
226 227 goto bad;
227 228 *mp = m;
228 229 return (0);
229 230
230 231 bad:
231 232 if (m != NULL)
232 233 freemsg(m);
233 234 return (error);
234 235 }
235 236
236 237
237 238 /*
238 239 * Attach file to the bpf interface, i.e. make d listen on bp.
239 240 */
240 241 static int
241 242 bpf_attachd(struct bpf_d *d, const char *ifname, int dlt)
242 243 {
243 244 bpf_provider_list_t *bp;
244 245 bpf_provider_t *bpr;
245 246 boolean_t zonematch;
246 247 zoneid_t niczone;
247 248 uintptr_t mcip;
248 249 zoneid_t zone;
249 250 uint_t nicdlt;
250 251 uintptr_t mh;
251 252 int hdrlen;
252 253 int error;
253 254
254 255 ASSERT(d->bd_bif == NULL);
255 256 ASSERT(d->bd_mcip == NULL);
256 257 zone = d->bd_zone;
257 258 zonematch = B_TRUE;
258 259 again:
259 260 mh = 0;
260 261 mcip = 0;
261 262 LIST_FOREACH(bp, &bpf_providers, bpl_next) {
262 263 bpr = bp->bpl_what;
263 264 error = MBPF_OPEN(bpr, ifname, &mh, zone);
264 265 if (error != 0)
265 266 goto next;
266 267 error = MBPF_CLIENT_OPEN(bpr, mh, &mcip);
267 268 if (error != 0)
268 269 goto next;
269 270 error = MBPF_GET_DLT(bpr, mh, &nicdlt);
270 271 if (error != 0)
271 272 goto next;
272 273
273 274 nicdlt = bpf_dl_to_dlt(nicdlt);
274 275 if (dlt != -1 && dlt != nicdlt) {
275 276 error = ENOENT;
276 277 goto next;
277 278 }
278 279
279 280 error = MBPF_GET_ZONE(bpr, mh, &niczone);
280 281 if (error != 0)
281 282 goto next;
282 283
283 284 DTRACE_PROBE4(bpf__attach, struct bpf_provider_s *, bpr,
284 285 uintptr_t, mh, int, nicdlt, zoneid_t, niczone);
285 286
286 287 if (zonematch && niczone != zone) {
287 288 error = ENOENT;
288 289 goto next;
289 290 }
290 291 break;
291 292 next:
292 293 if (mcip != 0) {
293 294 MBPF_CLIENT_CLOSE(bpr, mcip);
294 295 mcip = 0;
295 296 }
296 297 if (mh != NULL) {
297 298 MBPF_CLOSE(bpr, mh);
298 299 mh = 0;
299 300 }
300 301 }
301 302 if (error != 0) {
302 303 if (zonematch && (zone == GLOBAL_ZONEID)) {
303 304 /*
304 305 * If we failed to do an exact match for the global
305 306 * zone using the global zoneid, try again in case
306 307 * the network interface is owned by a local zone.
307 308 */
308 309 zonematch = B_FALSE;
309 310 goto again;
310 311 }
311 312 return (error);
312 313 }
313 314
314 315 d->bd_mac = *bpr;
315 316 d->bd_mcip = mcip;
316 317 d->bd_bif = mh;
317 318 d->bd_dlt = nicdlt;
318 319 hdrlen = bpf_dl_hdrsize(nicdlt);
319 320 d->bd_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
320 321
321 322 (void) strlcpy(d->bd_ifname, MBPF_CLIENT_NAME(&d->bd_mac, mcip),
322 323 sizeof (d->bd_ifname));
323 324
324 325 (void) MBPF_GET_LINKID(&d->bd_mac, d->bd_ifname, &d->bd_linkid,
325 326 zone);
326 327 (void) MBPF_PROMISC_ADD(&d->bd_mac, d->bd_mcip, 0, d,
327 328 &d->bd_promisc_handle, d->bd_promisc_flags);
328 329 return (0);
329 330 }
330 331
331 332 /*
332 333 * Detach a file from its interface.
333 334 */
334 335 static void
335 336 bpf_detachd(struct bpf_d *d)
336 337 {
337 338 uintptr_t mph;
338 339 uintptr_t mch;
339 340 uintptr_t mh;
340 341
341 342 ASSERT(d->bd_inuse == -1);
342 343 mch = d->bd_mcip;
343 344 d->bd_mcip = 0;
344 345 mh = d->bd_bif;
345 346 d->bd_bif = 0;
346 347
347 348 /*
348 349 * Check if this descriptor had requested promiscuous mode.
349 350 * If so, turn it off. There's no need to take any action
350 351 * here, that is done when MBPF_PROMISC_REMOVE is used;
351 352 * bd_promisc is just a local flag to stop promiscuous mode
352 353 * from being set more than once.
353 354 */
354 355 if (d->bd_promisc)
355 356 d->bd_promisc = 0;
356 357
357 358 /*
358 359 * Take device out of "promiscuous" mode. Since we were able to
359 360 * enter "promiscuous" mode, we should be able to turn it off.
360 361 * Note, this field stores a pointer used to support both
361 362 * promiscuous and non-promiscuous callbacks for packets.
362 363 */
363 364 mph = d->bd_promisc_handle;
364 365 d->bd_promisc_handle = 0;
365 366
366 367 /*
367 368 * The lock has to be dropped here because mac_promisc_remove may
368 369 * need to wait for mac_promisc_dispatch, which has called into
369 370 * bpf and catchpacket is waiting for bd_lock...
370 371 * i.e mac_promisc_remove() needs to be called with none of the
371 372 * locks held that are part of the bpf_mtap() call path.
372 373 */
373 374 mutex_exit(&d->bd_lock);
374 375 if (mph != 0)
375 376 MBPF_PROMISC_REMOVE(&d->bd_mac, mph);
376 377
377 378 if (mch != 0)
378 379 MBPF_CLIENT_CLOSE(&d->bd_mac, mch);
379 380
380 381 if (mh != 0)
381 382 MBPF_CLOSE(&d->bd_mac, mh);
382 383
383 384 /*
384 385 * Because this function is called with bd_lock held, so it must
385 386 * exit with it held.
386 387 */
387 388 mutex_enter(&d->bd_lock);
388 389 *d->bd_ifname = '\0';
389 390 (void) memset(&d->bd_mac, 0, sizeof (d->bd_mac));
390 391 }
391 392
392 393
393 394 /*
394 395 * bpfilterattach() is called at load time.
395 396 */
396 397 int
397 398 bpfilterattach(void)
398 399 {
399 400
400 401 bpf_hash = mod_hash_create_idhash("bpf_dev_tab", 31,
401 402 mod_hash_null_keydtor);
402 403 if (bpf_hash == NULL)
403 404 return (ENOMEM);
404 405
405 406 (void) memcpy(&ks_stats, &bpf_kstats, sizeof (bpf_kstats));
406 407
407 408 bpf_ksp = kstat_create("bpf", 0, "global", "misc",
408 409 KSTAT_TYPE_NAMED, sizeof (bpf_kstats) / sizeof (kstat_named_t),
409 410 KSTAT_FLAG_VIRTUAL);
410 411 if (bpf_ksp != NULL) {
411 412 bpf_ksp->ks_data = &ks_stats;
412 413 kstat_install(bpf_ksp);
413 414 } else {
414 415 mod_hash_destroy_idhash(bpf_hash);
415 416 bpf_hash = NULL;
416 417 return (EEXIST);
417 418 }
418 419
419 420 cv_init(&bpf_dlt_waiter, NULL, CV_DRIVER, NULL);
420 421 mutex_init(&bpf_mtx, NULL, MUTEX_DRIVER, NULL);
421 422
422 423 LIST_INIT(&bpf_list);
423 424
424 425 return (0);
425 426 }
426 427
427 428
428 429 /*
429 430 * bpfilterdetach() is called at unload time.
430 431 */
431 432 int
432 433 bpfilterdetach(void)
433 434 {
434 435
435 436 if (bpf_ksp != NULL) {
436 437 kstat_delete(bpf_ksp);
437 438 bpf_ksp = NULL;
438 439 }
439 440
440 441 mod_hash_destroy_idhash(bpf_hash);
441 442 bpf_hash = NULL;
442 443
443 444 cv_destroy(&bpf_dlt_waiter);
444 445 mutex_destroy(&bpf_mtx);
445 446
446 447 return (0);
447 448 }
448 449
449 450 /*
450 451 * Open ethernet device. Clones.
451 452 */
452 453 /* ARGSUSED */
453 454 int
454 455 bpfopen(dev_t *devp, int flag, int mode, cred_t *cred)
455 456 {
456 457 struct bpf_d *d;
457 458 uint_t dmin;
458 459
459 460 /*
460 461 * The security policy described at the top of this file is
461 462 * enforced here.
462 463 */
463 464 if ((flag & FWRITE) != 0) {
464 465 if (secpolicy_net_rawaccess(cred) != 0)
465 466 return (EACCES);
466 467 }
467 468
468 469 if ((flag & FREAD) != 0) {
469 470 if ((secpolicy_net_observability(cred) != 0) &&
470 471 (secpolicy_net_rawaccess(cred) != 0))
471 472 return (EACCES);
472 473 }
473 474
474 475 if ((flag & (FWRITE|FREAD)) == 0)
475 476 return (ENXIO);
476 477
477 478 /*
478 479 * A structure is allocated per open file in BPF to store settings
479 480 * such as buffer capture size, provide private buffers, etc.
480 481 */
481 482 d = (struct bpf_d *)kmem_zalloc(sizeof (*d), KM_SLEEP);
482 483 d->bd_bufsize = bpf_bufsize;
483 484 d->bd_fmode = flag;
484 485 d->bd_zone = crgetzoneid(cred);
485 486 d->bd_seesent = 1;
486 487 d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_PHYS|
487 488 MAC_PROMISC_FLAGS_NO_COPY;
488 489 mutex_init(&d->bd_lock, NULL, MUTEX_DRIVER, NULL);
489 490 cv_init(&d->bd_wait, NULL, CV_DRIVER, NULL);
490 491
491 492 mutex_enter(&bpf_mtx);
492 493 /*
493 494 * Find an unused minor number. Obviously this is an O(n) algorithm
494 495 * and doesn't scale particularly well, so if there are large numbers
495 496 * of open file descriptors happening in real use, this design may
496 497 * need to be revisited.
497 498 */
498 499 for (dmin = 0; dmin < L_MAXMIN; dmin++)
499 500 if (bpf_dev_find(dmin) == NULL)
500 501 break;
501 502 if (dmin == L_MAXMIN) {
502 503 mutex_exit(&bpf_mtx);
503 504 kmem_free(d, sizeof (*d));
504 505 return (ENXIO);
505 506 }
506 507 d->bd_dev = dmin;
507 508 LIST_INSERT_HEAD(&bpf_list, d, bd_list);
508 509 bpf_dev_add(d);
509 510 mutex_exit(&bpf_mtx);
510 511
511 512 *devp = makedevice(getmajor(*devp), dmin);
512 513
513 514 return (0);
514 515 }
515 516
516 517 /*
517 518 * Close the descriptor by detaching it from its interface,
518 519 * deallocating its buffers, and marking it free.
519 520 *
520 521 * Because we only allow a device to be opened once, there is always a
521 522 * 1 to 1 relationship between opens and closes supporting this function.
522 523 */
523 524 /* ARGSUSED */
524 525 int
525 526 bpfclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
526 527 {
527 528 struct bpf_d *d = bpf_dev_get(getminor(dev));
528 529
529 530 mutex_enter(&d->bd_lock);
530 531
531 532 while (d->bd_inuse != 0) {
532 533 d->bd_waiting++;
533 534 if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) {
534 535 d->bd_waiting--;
535 536 mutex_exit(&d->bd_lock);
536 537 return (EINTR);
537 538 }
538 539 d->bd_waiting--;
539 540 }
540 541
541 542 d->bd_inuse = -1;
542 543 if (d->bd_state == BPF_WAITING)
543 544 bpf_clear_timeout(d);
544 545 d->bd_state = BPF_IDLE;
545 546 if (d->bd_bif)
546 547 bpf_detachd(d);
547 548 mutex_exit(&d->bd_lock);
548 549
549 550 mutex_enter(&bpf_mtx);
550 551 LIST_REMOVE(d, bd_list);
551 552 bpf_dev_remove(d);
552 553 mutex_exit(&bpf_mtx);
553 554
554 555 mutex_enter(&d->bd_lock);
555 556 mutex_destroy(&d->bd_lock);
556 557 cv_destroy(&d->bd_wait);
557 558
558 559 bpf_freed(d);
559 560 kmem_free(d, sizeof (*d));
560 561
561 562 return (0);
562 563 }
563 564
564 565 /*
565 566 * Rotate the packet buffers in descriptor d. Move the store buffer
566 567 * into the hold slot, and the free buffer into the store slot.
567 568 * Zero the length of the new store buffer.
568 569 */
569 570 #define ROTATE_BUFFERS(d) \
570 571 (d)->bd_hbuf = (d)->bd_sbuf; \
571 572 (d)->bd_hlen = (d)->bd_slen; \
572 573 (d)->bd_sbuf = (d)->bd_fbuf; \
573 574 (d)->bd_slen = 0; \
574 575 (d)->bd_fbuf = 0;
575 576 /*
576 577 * bpfread - read next chunk of packets from buffers
577 578 */
578 579 /* ARGSUSED */
579 580 int
580 581 bpfread(dev_t dev, struct uio *uio, cred_t *cred)
581 582 {
582 583 struct bpf_d *d = bpf_dev_get(getminor(dev));
583 584 int timed_out;
584 585 ulong_t delay;
585 586 int error;
586 587
587 588 if ((d->bd_fmode & FREAD) == 0)
588 589 return (EBADF);
589 590
590 591 /*
591 592 * Restrict application to use a buffer the same size as
592 593 * the kernel buffers.
593 594 */
594 595 if (uio->uio_resid != d->bd_bufsize)
595 596 return (EINVAL);
596 597
597 598 mutex_enter(&d->bd_lock);
598 599 if (d->bd_state == BPF_WAITING)
599 600 bpf_clear_timeout(d);
600 601 timed_out = (d->bd_state == BPF_TIMED_OUT);
601 602 d->bd_state = BPF_IDLE;
602 603 /*
603 604 * If the hold buffer is empty, then do a timed sleep, which
604 605 * ends when the timeout expires or when enough packets
605 606 * have arrived to fill the store buffer.
606 607 */
607 608 while (d->bd_hbuf == 0) {
608 609 if (d->bd_nonblock) {
609 610 if (d->bd_slen == 0) {
610 611 mutex_exit(&d->bd_lock);
611 612 return (EWOULDBLOCK);
612 613 }
613 614 ROTATE_BUFFERS(d);
614 615 break;
615 616 }
616 617
617 618 if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
618 619 /*
619 620 * A packet(s) either arrived since the previous
620 621 * read or arrived while we were asleep.
621 622 * Rotate the buffers and return what's here.
622 623 */
623 624 ROTATE_BUFFERS(d);
624 625 break;
625 626 }
626 627 ks_stats.kp_read_wait.value.ui64++;
627 628 delay = ddi_get_lbolt() + d->bd_rtout;
628 629 error = cv_timedwait_sig(&d->bd_wait, &d->bd_lock, delay);
629 630 if (error == 0) {
630 631 mutex_exit(&d->bd_lock);
631 632 return (EINTR);
632 633 }
633 634 if (error == -1) {
634 635 /*
635 636 * On a timeout, return what's in the buffer,
636 637 * which may be nothing. If there is something
637 638 * in the store buffer, we can rotate the buffers.
638 639 */
639 640 if (d->bd_hbuf)
640 641 /*
641 642 * We filled up the buffer in between
642 643 * getting the timeout and arriving
643 644 * here, so we don't need to rotate.
644 645 */
645 646 break;
646 647
647 648 if (d->bd_slen == 0) {
648 649 mutex_exit(&d->bd_lock);
649 650 return (0);
650 651 }
651 652 ROTATE_BUFFERS(d);
652 653 }
653 654 }
654 655 /*
655 656 * At this point, we know we have something in the hold slot.
656 657 */
657 658 mutex_exit(&d->bd_lock);
658 659
659 660 /*
660 661 * Move data from hold buffer into user space.
661 662 * We know the entire buffer is transferred since
662 663 * we checked above that the read buffer is bpf_bufsize bytes.
663 664 */
664 665 error = uiomove(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
665 666
666 667 mutex_enter(&d->bd_lock);
667 668 d->bd_fbuf = d->bd_hbuf;
668 669 d->bd_hbuf = 0;
669 670 d->bd_hlen = 0;
670 671 done:
671 672 mutex_exit(&d->bd_lock);
672 673 return (error);
673 674 }
674 675
675 676
676 677 /*
677 678 * If there are processes sleeping on this descriptor, wake them up.
678 679 * NOTE: the lock for bd_wait is bd_lock and is held by bpf_deliver,
679 680 * so there is no code here grabbing it.
680 681 */
681 682 static inline void
682 683 bpf_wakeup(struct bpf_d *d)
683 684 {
684 685 cv_signal(&d->bd_wait);
685 686 }
686 687
687 688 static void
688 689 bpf_timed_out(void *arg)
689 690 {
690 691 struct bpf_d *d = arg;
691 692
692 693 mutex_enter(&d->bd_lock);
693 694 if (d->bd_state == BPF_WAITING) {
694 695 d->bd_state = BPF_TIMED_OUT;
695 696 if (d->bd_slen != 0)
696 697 cv_signal(&d->bd_wait);
697 698 }
698 699 mutex_exit(&d->bd_lock);
699 700 }
700 701
701 702
702 703 /* ARGSUSED */
703 704 int
704 705 bpfwrite(dev_t dev, struct uio *uio, cred_t *cred)
705 706 {
706 707 struct bpf_d *d = bpf_dev_get(getminor(dev));
707 708 uintptr_t mch;
708 709 uint_t mtu;
709 710 mblk_t *m;
710 711 int error;
711 712 int dlt;
712 713
713 714 if ((d->bd_fmode & FWRITE) == 0)
714 715 return (EBADF);
715 716
716 717 mutex_enter(&d->bd_lock);
717 718 if (d->bd_bif == 0 || d->bd_mcip == 0 || d->bd_bif == 0) {
718 719 mutex_exit(&d->bd_lock);
719 720 return (EINTR);
720 721 }
721 722
722 723 if (uio->uio_resid == 0) {
723 724 mutex_exit(&d->bd_lock);
724 725 return (0);
725 726 }
726 727
727 728 while (d->bd_inuse < 0) {
728 729 d->bd_waiting++;
729 730 if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) {
730 731 d->bd_waiting--;
731 732 mutex_exit(&d->bd_lock);
732 733 return (EINTR);
733 734 }
734 735 d->bd_waiting--;
735 736 }
736 737
737 738 mutex_exit(&d->bd_lock);
738 739
739 740 dlt = d->bd_dlt;
740 741 mch = d->bd_mcip;
741 742 MBPF_SDU_GET(&d->bd_mac, d->bd_bif, &mtu);
742 743 d->bd_inuse++;
743 744
744 745 m = NULL;
745 746 if (dlt == DLT_IPNET) {
746 747 error = EIO;
747 748 goto done;
748 749 }
749 750
750 751 error = bpf_movein(uio, dlt, mtu, &m);
751 752 if (error)
752 753 goto done;
753 754
754 755 DTRACE_PROBE4(bpf__tx, struct bpf_d *, d, int, dlt,
755 756 uint_t, mtu, mblk_t *, m);
756 757
757 758 if (M_LEN(m) > mtu) {
758 759 error = EMSGSIZE;
759 760 goto done;
760 761 }
761 762
762 763 error = MBPF_TX(&d->bd_mac, mch, m);
763 764 /*
764 765 * The "tx" action here is required to consume the mblk_t.
765 766 */
766 767 m = NULL;
767 768
768 769 done:
769 770 if (error == 0)
770 771 ks_stats.kp_write_ok.value.ui64++;
771 772 else
772 773 ks_stats.kp_write_error.value.ui64++;
773 774 if (m != NULL)
774 775 freemsg(m);
775 776
776 777 mutex_enter(&d->bd_lock);
777 778 d->bd_inuse--;
778 779 if ((d->bd_inuse == 0) && (d->bd_waiting != 0))
779 780 cv_signal(&d->bd_wait);
780 781 mutex_exit(&d->bd_lock);
781 782
782 783 /*
783 784 * The driver frees the mbuf.
784 785 */
785 786 return (error);
786 787 }
787 788
788 789
789 790 /*
790 791 * Reset a descriptor by flushing its packet buffer and clearing the
791 792 * receive and drop counts. Should be called at splnet.
792 793 */
793 794 static void
794 795 reset_d(struct bpf_d *d)
795 796 {
796 797 if (d->bd_hbuf) {
797 798 /* Free the hold buffer. */
798 799 d->bd_fbuf = d->bd_hbuf;
799 800 d->bd_hbuf = 0;
800 801 }
801 802 d->bd_slen = 0;
802 803 d->bd_hlen = 0;
803 804 d->bd_rcount = 0;
804 805 d->bd_dcount = 0;
805 806 d->bd_ccount = 0;
806 807 }
807 808
808 809 /*
809 810 * FIONREAD Check for read packet available.
810 811 * BIOCGBLEN Get buffer len [for read()].
811 812 * BIOCSETF Set ethernet read filter.
812 813 * BIOCFLUSH Flush read packet buffer.
813 814 * BIOCPROMISC Put interface into promiscuous mode.
814 815 * BIOCGDLT Get link layer type.
815 816 * BIOCGETIF Get interface name.
816 817 * BIOCSETIF Set interface.
817 818 * BIOCSRTIMEOUT Set read timeout.
818 819 * BIOCGRTIMEOUT Get read timeout.
819 820 * BIOCGSTATS Get packet stats.
820 821 * BIOCIMMEDIATE Set immediate mode.
821 822 * BIOCVERSION Get filter language version.
822 823 * BIOCGHDRCMPLT Get "header already complete" flag.
823 824 * BIOCSHDRCMPLT Set "header already complete" flag.
824 825 */
825 826 /* ARGSUSED */
826 827 int
827 828 bpfioctl(dev_t dev, int cmd, intptr_t addr, int mode, cred_t *cred, int *rval)
828 829 {
829 830 struct bpf_d *d = bpf_dev_get(getminor(dev));
830 831 struct bpf_program prog;
831 832 struct lifreq lifreq;
832 833 struct ifreq ifreq;
833 834 int error = 0;
834 835 uint_t size;
835 836
836 837 /*
837 838 * Refresh the PID associated with this bpf file.
838 839 */
839 840 mutex_enter(&d->bd_lock);
840 841 if (d->bd_state == BPF_WAITING)
841 842 bpf_clear_timeout(d);
842 843 d->bd_state = BPF_IDLE;
843 844 mutex_exit(&d->bd_lock);
844 845
845 846 switch (cmd) {
846 847
847 848 default:
848 849 error = EINVAL;
849 850 break;
850 851
851 852 /*
852 853 * Check for read packet available.
853 854 */
854 855 case FIONREAD:
855 856 {
856 857 int n;
857 858
858 859 mutex_enter(&d->bd_lock);
859 860 n = d->bd_slen;
860 861 if (d->bd_hbuf)
861 862 n += d->bd_hlen;
862 863 mutex_exit(&d->bd_lock);
863 864
864 865 *(int *)addr = n;
865 866 break;
866 867 }
867 868
868 869 /*
869 870 * Get buffer len [for read()].
870 871 */
871 872 case BIOCGBLEN:
872 873 error = copyout(&d->bd_bufsize, (void *)addr,
873 874 sizeof (d->bd_bufsize));
874 875 break;
875 876
876 877 /*
877 878 * Set buffer length.
878 879 */
879 880 case BIOCSBLEN:
880 881 if (copyin((void *)addr, &size, sizeof (size)) != 0) {
881 882 error = EFAULT;
882 883 break;
883 884 }
884 885
885 886 mutex_enter(&d->bd_lock);
886 887 if (d->bd_bif != 0) {
887 888 error = EINVAL;
888 889 } else {
889 890 if (size > bpf_maxbufsize)
890 891 size = bpf_maxbufsize;
891 892 else if (size < BPF_MINBUFSIZE)
892 893 size = BPF_MINBUFSIZE;
893 894
894 895 d->bd_bufsize = size;
895 896 }
896 897 mutex_exit(&d->bd_lock);
897 898
898 899 if (error == 0)
899 900 error = copyout(&size, (void *)addr, sizeof (size));
900 901 break;
901 902
902 903 /*
903 904 * Set link layer read filter.
904 905 */
905 906 case BIOCSETF:
906 907 if (ddi_copyin((void *)addr, &prog, sizeof (prog), mode)) {
907 908 error = EFAULT;
908 909 break;
909 910 }
910 911 error = bpf_setf(d, &prog);
911 912 break;
912 913
913 914 /*
914 915 * Flush read packet buffer.
915 916 */
916 917 case BIOCFLUSH:
917 918 mutex_enter(&d->bd_lock);
918 919 reset_d(d);
919 920 mutex_exit(&d->bd_lock);
920 921 break;
921 922
922 923 /*
923 924 * Put interface into promiscuous mode.
924 925 * This is a one-way ioctl, it is not used to turn promiscuous
925 926 * mode off.
926 927 */
927 928 case BIOCPROMISC:
928 929 if (d->bd_bif == 0) {
929 930 /*
930 931 * No interface attached yet.
931 932 */
932 933 error = EINVAL;
933 934 break;
934 935 }
935 936 mutex_enter(&d->bd_lock);
936 937 if (d->bd_promisc == 0) {
937 938
938 939 if (d->bd_promisc_handle) {
939 940 uintptr_t mph;
940 941
941 942 mph = d->bd_promisc_handle;
942 943 d->bd_promisc_handle = 0;
943 944
944 945 mutex_exit(&d->bd_lock);
945 946 MBPF_PROMISC_REMOVE(&d->bd_mac, mph);
946 947 mutex_enter(&d->bd_lock);
947 948 }
948 949
949 950 d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_COPY;
950 951 error = MBPF_PROMISC_ADD(&d->bd_mac,
951 952 d->bd_mcip, MAC_CLIENT_PROMISC_ALL, d,
952 953 &d->bd_promisc_handle, d->bd_promisc_flags);
953 954 if (error == 0)
954 955 d->bd_promisc = 1;
955 956 }
956 957 mutex_exit(&d->bd_lock);
957 958 break;
958 959
959 960 /*
960 961 * Get device parameters.
961 962 */
962 963 case BIOCGDLT:
963 964 if (d->bd_bif == 0)
964 965 error = EINVAL;
965 966 else
966 967 error = copyout(&d->bd_dlt, (void *)addr,
967 968 sizeof (d->bd_dlt));
968 969 break;
969 970
970 971 /*
971 972 * Get a list of supported device parameters.
972 973 */
973 974 case BIOCGDLTLIST:
974 975 if (d->bd_bif == 0) {
975 976 error = EINVAL;
976 977 } else {
977 978 struct bpf_dltlist list;
978 979
979 980 if (copyin((void *)addr, &list, sizeof (list)) != 0) {
980 981 error = EFAULT;
981 982 break;
982 983 }
983 984 error = bpf_getdltlist(d, &list);
984 985 if ((error == 0) &&
985 986 copyout(&list, (void *)addr, sizeof (list)) != 0)
986 987 error = EFAULT;
987 988 }
988 989 break;
989 990
990 991 /*
991 992 * Set device parameters.
992 993 */
993 994 case BIOCSDLT:
994 995 error = bpf_setdlt(d, (void *)addr);
995 996 break;
996 997
997 998 /*
998 999 * Get interface name.
999 1000 */
1000 1001 case BIOCGETIF:
1001 1002 if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) {
1002 1003 error = EFAULT;
1003 1004 break;
1004 1005 }
1005 1006 error = bpf_ifname(d, ifreq.ifr_name, sizeof (ifreq.ifr_name));
1006 1007 if ((error == 0) &&
1007 1008 copyout(&ifreq, (void *)addr, sizeof (ifreq)) != 0) {
1008 1009 error = EFAULT;
1009 1010 break;
1010 1011 }
1011 1012 break;
1012 1013
1013 1014 /*
1014 1015 * Set interface.
1015 1016 */
1016 1017 case BIOCSETIF:
1017 1018 if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) {
1018 1019 error = EFAULT;
1019 1020 break;
1020 1021 }
1021 1022 error = bpf_setif(d, ifreq.ifr_name, sizeof (ifreq.ifr_name));
1022 1023 break;
1023 1024
1024 1025 /*
1025 1026 * Get interface name.
1026 1027 */
1027 1028 case BIOCGETLIF:
1028 1029 if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) {
1029 1030 error = EFAULT;
1030 1031 break;
1031 1032 }
1032 1033 error = bpf_ifname(d, lifreq.lifr_name,
1033 1034 sizeof (lifreq.lifr_name));
1034 1035 if ((error == 0) &&
1035 1036 copyout(&lifreq, (void *)addr, sizeof (lifreq)) != 0) {
1036 1037 error = EFAULT;
1037 1038 break;
1038 1039 }
1039 1040 break;
1040 1041
1041 1042 /*
1042 1043 * Set interface.
1043 1044 */
1044 1045 case BIOCSETLIF:
1045 1046 if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) {
1046 1047 error = EFAULT;
1047 1048 break;
1048 1049 }
1049 1050 error = bpf_setif(d, lifreq.lifr_name,
1050 1051 sizeof (lifreq.lifr_name));
1051 1052 break;
1052 1053
1053 1054 #ifdef _SYSCALL32_IMPL
1054 1055 /*
1055 1056 * Set read timeout.
1056 1057 */
1057 1058 case BIOCSRTIMEOUT32:
1058 1059 {
1059 1060 struct timeval32 tv;
1060 1061
1061 1062 if (copyin((void *)addr, &tv, sizeof (tv)) != 0) {
1062 1063 error = EFAULT;
1063 1064 break;
1064 1065 }
1065 1066
1066 1067 /* Convert the timeout in microseconds to ticks */
1067 1068 d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 +
1068 1069 tv.tv_usec);
1069 1070 if ((d->bd_rtout == 0) && (tv.tv_usec != 0))
1070 1071 d->bd_rtout = 1;
1071 1072 break;
1072 1073 }
1073 1074
1074 1075 /*
1075 1076 * Get read timeout.
1076 1077 */
1077 1078 case BIOCGRTIMEOUT32:
1078 1079 {
1079 1080 struct timeval32 tv;
1080 1081 clock_t ticks;
1081 1082
1082 1083 ticks = drv_hztousec(d->bd_rtout);
1083 1084 tv.tv_sec = ticks / 1000000;
1084 1085 tv.tv_usec = ticks - (tv.tv_sec * 1000000);
1085 1086 error = copyout(&tv, (void *)addr, sizeof (tv));
1086 1087 break;
1087 1088 }
1088 1089
1089 1090 /*
1090 1091 * Get a list of supported device parameters.
1091 1092 */
1092 1093 case BIOCGDLTLIST32:
1093 1094 if (d->bd_bif == 0) {
1094 1095 error = EINVAL;
1095 1096 } else {
1096 1097 struct bpf_dltlist32 lst32;
1097 1098 struct bpf_dltlist list;
1098 1099
1099 1100 if (copyin((void *)addr, &lst32, sizeof (lst32)) != 0) {
1100 1101 error = EFAULT;
1101 1102 break;
1102 1103 }
1103 1104
1104 1105 list.bfl_len = lst32.bfl_len;
1105 1106 list.bfl_list = (void *)(uint64_t)lst32.bfl_list;
1106 1107 error = bpf_getdltlist(d, &list);
1107 1108 if (error == 0) {
1108 1109 lst32.bfl_len = list.bfl_len;
1109 1110
1110 1111 if (copyout(&lst32, (void *)addr,
1111 1112 sizeof (lst32)) != 0)
1112 1113 error = EFAULT;
1113 1114 }
1114 1115 }
1115 1116 break;
1116 1117
1117 1118 /*
1118 1119 * Set link layer read filter.
1119 1120 */
1120 1121 case BIOCSETF32: {
1121 1122 struct bpf_program32 prog32;
1122 1123
1123 1124 if (ddi_copyin((void *)addr, &prog32, sizeof (prog), mode)) {
1124 1125 error = EFAULT;
1125 1126 break;
1126 1127 }
1127 1128 prog.bf_len = prog32.bf_len;
1128 1129 prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1129 1130 error = bpf_setf(d, &prog);
1130 1131 break;
1131 1132 }
1132 1133 #endif
1133 1134
1134 1135 /*
1135 1136 * Set read timeout.
1136 1137 */
1137 1138 case BIOCSRTIMEOUT:
1138 1139 {
1139 1140 struct timeval tv;
1140 1141
1141 1142 if (copyin((void *)addr, &tv, sizeof (tv)) != 0) {
1142 1143 error = EFAULT;
1143 1144 break;
1144 1145 }
1145 1146
1146 1147 /* Convert the timeout in microseconds to ticks */
1147 1148 d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 +
1148 1149 tv.tv_usec);
1149 1150 if ((d->bd_rtout == 0) && (tv.tv_usec != 0))
1150 1151 d->bd_rtout = 1;
1151 1152 break;
1152 1153 }
1153 1154
1154 1155 /*
1155 1156 * Get read timeout.
1156 1157 */
1157 1158 case BIOCGRTIMEOUT:
1158 1159 {
1159 1160 struct timeval tv;
1160 1161 clock_t ticks;
1161 1162
1162 1163 ticks = drv_hztousec(d->bd_rtout);
1163 1164 tv.tv_sec = ticks / 1000000;
1164 1165 tv.tv_usec = ticks - (tv.tv_sec * 1000000);
1165 1166 if (copyout(&tv, (void *)addr, sizeof (tv)) != 0)
1166 1167 error = EFAULT;
1167 1168 break;
1168 1169 }
1169 1170
1170 1171 /*
1171 1172 * Get packet stats.
1172 1173 */
1173 1174 case BIOCGSTATS:
1174 1175 {
1175 1176 struct bpf_stat bs;
1176 1177
1177 1178 bs.bs_recv = d->bd_rcount;
1178 1179 bs.bs_drop = d->bd_dcount;
1179 1180 bs.bs_capt = d->bd_ccount;
1180 1181 if (copyout(&bs, (void *)addr, sizeof (bs)) != 0)
1181 1182 error = EFAULT;
1182 1183 break;
1183 1184 }
1184 1185
1185 1186 /*
1186 1187 * Set immediate mode.
1187 1188 */
1188 1189 case BIOCIMMEDIATE:
1189 1190 if (copyin((void *)addr, &d->bd_immediate,
1190 1191 sizeof (d->bd_immediate)) != 0)
1191 1192 error = EFAULT;
1192 1193 break;
1193 1194
1194 1195 case BIOCVERSION:
1195 1196 {
1196 1197 struct bpf_version bv;
1197 1198
1198 1199 bv.bv_major = BPF_MAJOR_VERSION;
1199 1200 bv.bv_minor = BPF_MINOR_VERSION;
1200 1201 if (copyout(&bv, (void *)addr, sizeof (bv)) != 0)
1201 1202 error = EFAULT;
1202 1203 break;
1203 1204 }
1204 1205
1205 1206 case BIOCGHDRCMPLT: /* get "header already complete" flag */
1206 1207 if (copyout(&d->bd_hdrcmplt, (void *)addr,
1207 1208 sizeof (d->bd_hdrcmplt)) != 0)
1208 1209 error = EFAULT;
1209 1210 break;
1210 1211
1211 1212 case BIOCSHDRCMPLT: /* set "header already complete" flag */
1212 1213 if (copyin((void *)addr, &d->bd_hdrcmplt,
1213 1214 sizeof (d->bd_hdrcmplt)) != 0)
1214 1215 error = EFAULT;
1215 1216 break;
1216 1217
1217 1218 /*
1218 1219 * Get "see sent packets" flag
1219 1220 */
1220 1221 case BIOCGSEESENT:
1221 1222 if (copyout(&d->bd_seesent, (void *)addr,
1222 1223 sizeof (d->bd_seesent)) != 0)
1223 1224 error = EFAULT;
1224 1225 break;
1225 1226
1226 1227 /*
1227 1228 * Set "see sent" packets flag
1228 1229 */
1229 1230 case BIOCSSEESENT:
1230 1231 if (copyin((void *)addr, &d->bd_seesent,
1231 1232 sizeof (d->bd_seesent)) != 0)
1232 1233 error = EFAULT;
1233 1234 break;
1234 1235
1235 1236 case FIONBIO: /* Non-blocking I/O */
1236 1237 if (copyin((void *)addr, &d->bd_nonblock,
1237 1238 sizeof (d->bd_nonblock)) != 0)
1238 1239 error = EFAULT;
1239 1240 break;
1240 1241 }
1241 1242 return (error);
1242 1243 }
1243 1244
1244 1245 /*
1245 1246 * Set d's packet filter program to fp. If this file already has a filter,
1246 1247 * free it and replace it. If the new filter is "empty" (has a 0 size), then
1247 1248 * the result is to just remove and free the existing filter.
1248 1249 * Returns EINVAL for bogus requests.
1249 1250 */
1250 1251 int
1251 1252 bpf_setf(struct bpf_d *d, struct bpf_program *fp)
1252 1253 {
1253 1254 struct bpf_insn *fcode, *old;
1254 1255 uint_t flen, size;
1255 1256 size_t oldsize;
1256 1257
1257 1258 if (fp->bf_insns == 0) {
1258 1259 if (fp->bf_len != 0)
1259 1260 return (EINVAL);
1260 1261 mutex_enter(&d->bd_lock);
1261 1262 old = d->bd_filter;
1262 1263 oldsize = d->bd_filter_size;
1263 1264 d->bd_filter = 0;
1264 1265 d->bd_filter_size = 0;
1265 1266 reset_d(d);
1266 1267 mutex_exit(&d->bd_lock);
1267 1268 if (old != 0)
1268 1269 kmem_free(old, oldsize);
1269 1270 return (0);
1270 1271 }
1271 1272 flen = fp->bf_len;
1272 1273 if (flen > BPF_MAXINSNS)
1273 1274 return (EINVAL);
1274 1275
1275 1276 size = flen * sizeof (*fp->bf_insns);
1276 1277 fcode = kmem_alloc(size, KM_SLEEP);
1277 1278 if (copyin(fp->bf_insns, fcode, size) != 0)
1278 1279 return (EFAULT);
1279 1280
1280 1281 if (bpf_validate(fcode, (int)flen)) {
1281 1282 mutex_enter(&d->bd_lock);
1282 1283 old = d->bd_filter;
1283 1284 oldsize = d->bd_filter_size;
1284 1285 d->bd_filter = fcode;
1285 1286 d->bd_filter_size = size;
1286 1287 reset_d(d);
1287 1288 mutex_exit(&d->bd_lock);
1288 1289 if (old != 0)
1289 1290 kmem_free(old, oldsize);
1290 1291
1291 1292 return (0);
1292 1293 }
1293 1294 kmem_free(fcode, size);
1294 1295 return (EINVAL);
1295 1296 }
1296 1297
1297 1298 /*
1298 1299 * Detach a file from its current interface (if attached at all) and attach
1299 1300 * to the interface indicated by the name stored in ifname.
1300 1301 * Return an errno or 0.
1301 1302 */
1302 1303 static int
1303 1304 bpf_setif(struct bpf_d *d, char *ifname, int namesize)
1304 1305 {
1305 1306 int unit_seen;
1306 1307 int error = 0;
1307 1308 char *cp;
1308 1309 int i;
1309 1310
1310 1311 /*
1311 1312 * Make sure the provided name has a unit number, and default
1312 1313 * it to '0' if not specified.
1313 1314 * XXX This is ugly ... do this differently?
1314 1315 */
1315 1316 unit_seen = 0;
1316 1317 cp = ifname;
1317 1318 cp[namesize - 1] = '\0'; /* sanity */
1318 1319 while (*cp++)
1319 1320 if (*cp >= '0' && *cp <= '9')
1320 1321 unit_seen = 1;
1321 1322 if (!unit_seen) {
1322 1323 /* Make sure to leave room for the '\0'. */
1323 1324 for (i = 0; i < (namesize - 1); ++i) {
1324 1325 if ((ifname[i] >= 'a' && ifname[i] <= 'z') ||
1325 1326 (ifname[i] >= 'A' && ifname[i] <= 'Z'))
1326 1327 continue;
1327 1328 ifname[i] = '0';
1328 1329 }
1329 1330 }
1330 1331
1331 1332 /*
1332 1333 * Make sure that only one call to this function happens at a time
1333 1334 * and that we're not interleaving a read/write
1334 1335 */
1335 1336 mutex_enter(&d->bd_lock);
1336 1337 while (d->bd_inuse != 0) {
1337 1338 d->bd_waiting++;
1338 1339 if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) {
1339 1340 d->bd_waiting--;
1340 1341 mutex_exit(&d->bd_lock);
1341 1342 return (EINTR);
1342 1343 }
1343 1344 d->bd_waiting--;
1344 1345 }
1345 1346 d->bd_inuse = -1;
1346 1347 mutex_exit(&d->bd_lock);
1347 1348
1348 1349 if (d->bd_sbuf == 0)
1349 1350 error = bpf_allocbufs(d);
1350 1351
1351 1352 if (error == 0) {
1352 1353 mutex_enter(&d->bd_lock);
1353 1354 if (d->bd_bif)
1354 1355 /*
1355 1356 * Detach if attached to something else.
1356 1357 */
1357 1358 bpf_detachd(d);
1358 1359
1359 1360 error = bpf_attachd(d, ifname, -1);
1360 1361 reset_d(d);
1361 1362 d->bd_inuse = 0;
1362 1363 if (d->bd_waiting != 0)
1363 1364 cv_signal(&d->bd_wait);
1364 1365 mutex_exit(&d->bd_lock);
1365 1366 return (error);
1366 1367 }
1367 1368
1368 1369 mutex_enter(&d->bd_lock);
1369 1370 d->bd_inuse = 0;
1370 1371 if (d->bd_waiting != 0)
1371 1372 cv_signal(&d->bd_wait);
1372 1373 mutex_exit(&d->bd_lock);
1373 1374
1374 1375 /*
1375 1376 * Try tickle the mac layer into attaching the device...
1376 1377 */
1377 1378 return (bpf_provider_tickle(ifname, d->bd_zone));
1378 1379 }
1379 1380
1380 1381 /*
1381 1382 * Copy the interface name to the ifreq.
1382 1383 */
1383 1384 static int
1384 1385 bpf_ifname(struct bpf_d *d, char *buffer, int bufsize)
1385 1386 {
1386 1387
1387 1388 mutex_enter(&d->bd_lock);
1388 1389 if (d->bd_bif == NULL) {
|
↓ open down ↓ |
1336 lines elided |
↑ open up ↑ |
1389 1390 mutex_exit(&d->bd_lock);
1390 1391 return (EINVAL);
1391 1392 }
1392 1393
1393 1394 (void) strlcpy(buffer, d->bd_ifname, bufsize);
1394 1395 mutex_exit(&d->bd_lock);
1395 1396
1396 1397 return (0);
1397 1398 }
1398 1399
1399 -/*
1400 - * Support for poll() system call
1401 - *
1402 - * Return true iff the specific operation will not block indefinitely - with
1403 - * the assumption that it is safe to positively acknowledge a request for the
1404 - * ability to write to the BPF device.
1405 - * Otherwise, return false but make a note that a selnotify() must be done.
1406 - */
1400 +/* ARGSUSED */
1407 1401 int
1408 1402 bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp,
1409 1403 struct pollhead **phpp)
1410 1404 {
1411 1405 struct bpf_d *d = bpf_dev_get(getminor(dev));
1412 1406
1407 + /*
1408 + * Until this driver is modified to issue proper pollwakeup() calls on
1409 + * its pollhead, edge-triggered polling is not allowed.
1410 + */
1411 + if (events & POLLET) {
1412 + return (EPERM);
1413 + }
1414 +
1413 1415 if (events & (POLLIN | POLLRDNORM)) {
1414 1416 /*
1415 1417 * An imitation of the FIONREAD ioctl code.
1416 1418 */
1417 1419 mutex_enter(&d->bd_lock);
1418 1420 if (d->bd_hlen != 0 ||
1419 1421 ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1420 1422 d->bd_slen != 0)) {
1421 1423 *reventsp |= events & (POLLIN | POLLRDNORM);
1422 1424 } else {
1425 + /*
1426 + * Until the bpf driver has been updated to include
1427 + * adequate pollwakeup() logic, no pollhead will be
1428 + * emitted here, preventing the resource from being
1429 + * cached by poll()/devpoll/epoll.
1430 + */
1423 1431 *reventsp = 0;
1424 - if (!anyyet)
1425 - *phpp = &d->bd_poll;
1426 1432 /* Start the read timeout if necessary */
1427 1433 if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1428 1434 bpf_clear_timeout(d);
1429 1435 /*
1430 1436 * Only allow the timeout to be set once.
1431 1437 */
1432 1438 if (d->bd_callout == 0)
1433 1439 d->bd_callout = timeout(bpf_timed_out,
1434 1440 d, d->bd_rtout);
1435 1441 d->bd_state = BPF_WAITING;
1436 1442 }
1437 1443 }
1438 1444 mutex_exit(&d->bd_lock);
1439 1445 }
1440 1446
1441 1447 return (0);
1442 1448 }
1443 1449
1444 1450 /*
1445 1451 * Copy data from an mblk_t chain into a buffer. This works for ipnet
1446 1452 * because the dl_ipnetinfo_t is placed in an mblk_t that leads the
1447 1453 * packet itself.
1448 1454 */
1449 1455 static void *
1450 1456 bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
1451 1457 {
1452 1458 const mblk_t *m;
1453 1459 uint_t count;
1454 1460 uchar_t *dst;
1455 1461
1456 1462 m = src_arg;
1457 1463 dst = dst_arg;
1458 1464 while (len > 0) {
1459 1465 if (m == NULL)
1460 1466 panic("bpf_mcpy");
1461 1467 count = (uint_t)min(M_LEN(m), len);
1462 1468 (void) memcpy(dst, mtod(m, const void *), count);
1463 1469 m = m->b_cont;
1464 1470 dst += count;
1465 1471 len -= count;
1466 1472 }
1467 1473 return (dst_arg);
1468 1474 }
1469 1475
1470 1476 /*
1471 1477 * Dispatch a packet to all the listeners on interface bp.
1472 1478 *
1473 1479 * marg pointer to the packet, either a data buffer or an mbuf chain
1474 1480 * buflen buffer length, if marg is a data buffer
1475 1481 * cpfn a function that can copy marg into the listener's buffer
1476 1482 * pktlen length of the packet
1477 1483 * issent boolean indicating whether the packet was sent or receive
1478 1484 */
1479 1485 static inline void
1480 1486 bpf_deliver(struct bpf_d *d, cp_fn_t cpfn, void *marg, uint_t pktlen,
1481 1487 uint_t buflen, boolean_t issent)
1482 1488 {
1483 1489 struct timeval tv;
1484 1490 uint_t slen;
1485 1491
1486 1492 if (!d->bd_seesent && issent)
1487 1493 return;
1488 1494
1489 1495 /*
1490 1496 * Accuracy of the packet counters in BPF is vital so it
1491 1497 * is important to protect even the outer ones.
1492 1498 */
1493 1499 mutex_enter(&d->bd_lock);
1494 1500 slen = bpf_filter(d->bd_filter, marg, pktlen, buflen);
1495 1501 DTRACE_PROBE5(bpf__packet, struct bpf_if *, d->bd_bif,
1496 1502 struct bpf_d *, d, void *, marg, uint_t, pktlen, uint_t, slen);
1497 1503 d->bd_rcount++;
1498 1504 ks_stats.kp_receive.value.ui64++;
1499 1505 if (slen != 0) {
1500 1506 uniqtime(&tv);
1501 1507 catchpacket(d, marg, pktlen, slen, cpfn, &tv);
1502 1508 }
1503 1509 mutex_exit(&d->bd_lock);
1504 1510 }
1505 1511
1506 1512 /*
1507 1513 * Incoming linkage from device drivers.
1508 1514 */
1509 1515 /* ARGSUSED */
1510 1516 void
1511 1517 bpf_mtap(void *arg, mac_resource_handle_t mrh, mblk_t *m, boolean_t issent)
1512 1518 {
1513 1519 cp_fn_t cpfn;
1514 1520 struct bpf_d *d = arg;
1515 1521 uint_t pktlen, buflen;
1516 1522 void *marg;
1517 1523
1518 1524 pktlen = msgdsize(m);
1519 1525
1520 1526 if (pktlen == M_LEN(m)) {
1521 1527 cpfn = (cp_fn_t)memcpy;
1522 1528 marg = mtod(m, void *);
1523 1529 buflen = pktlen;
1524 1530 } else {
1525 1531 cpfn = bpf_mcpy;
1526 1532 marg = m;
1527 1533 buflen = 0;
1528 1534 }
1529 1535
1530 1536 bpf_deliver(d, cpfn, marg, pktlen, buflen, issent);
1531 1537 }
1532 1538
1533 1539 /*
1534 1540 * Incoming linkage from ipnet.
1535 1541 * In ipnet, there is only one event, NH_OBSERVE, that delivers packets
1536 1542 * from all network interfaces. Thus the tap function needs to apply a
1537 1543 * filter using the interface index/id to immitate snoop'ing on just the
1538 1544 * specified interface.
1539 1545 */
1540 1546 /* ARGSUSED */
1541 1547 void
1542 1548 bpf_itap(void *arg, mblk_t *m, boolean_t issent, uint_t length)
1543 1549 {
1544 1550 hook_pkt_observe_t *hdr;
1545 1551 struct bpf_d *d = arg;
1546 1552
1547 1553 hdr = (hook_pkt_observe_t *)m->b_rptr;
1548 1554 if (ntohl(hdr->hpo_ifindex) != d->bd_linkid)
1549 1555 return;
1550 1556 bpf_deliver(d, bpf_mcpy, m, length, 0, issent);
1551 1557
1552 1558 }
1553 1559
1554 1560 /*
1555 1561 * Move the packet data from interface memory (pkt) into the
1556 1562 * store buffer. Return 1 if it's time to wakeup a listener (buffer full),
1557 1563 * otherwise 0. "copy" is the routine called to do the actual data
1558 1564 * transfer. memcpy is passed in to copy contiguous chunks, while
1559 1565 * bpf_mcpy is passed in to copy mbuf chains. In the latter case,
1560 1566 * pkt is really an mbuf.
1561 1567 */
1562 1568 static void
1563 1569 catchpacket(struct bpf_d *d, uchar_t *pkt, uint_t pktlen, uint_t snaplen,
1564 1570 cp_fn_t cpfn, struct timeval *tv)
1565 1571 {
1566 1572 struct bpf_hdr *hp;
1567 1573 int totlen, curlen;
1568 1574 int hdrlen = d->bd_hdrlen;
1569 1575 int do_wakeup = 0;
1570 1576
1571 1577 ++d->bd_ccount;
1572 1578 ks_stats.kp_capture.value.ui64++;
1573 1579 /*
1574 1580 * Figure out how many bytes to move. If the packet is
1575 1581 * greater or equal to the snapshot length, transfer that
1576 1582 * much. Otherwise, transfer the whole packet (unless
1577 1583 * we hit the buffer size limit).
1578 1584 */
1579 1585 totlen = hdrlen + min(snaplen, pktlen);
1580 1586 if (totlen > d->bd_bufsize)
1581 1587 totlen = d->bd_bufsize;
1582 1588
1583 1589 /*
1584 1590 * Round up the end of the previous packet to the next longword.
1585 1591 */
1586 1592 curlen = BPF_WORDALIGN(d->bd_slen);
1587 1593 if (curlen + totlen > d->bd_bufsize) {
1588 1594 /*
1589 1595 * This packet will overflow the storage buffer.
1590 1596 * Rotate the buffers if we can, then wakeup any
1591 1597 * pending reads.
1592 1598 */
1593 1599 if (d->bd_fbuf == 0) {
1594 1600 /*
1595 1601 * We haven't completed the previous read yet,
1596 1602 * so drop the packet.
1597 1603 */
1598 1604 ++d->bd_dcount;
1599 1605 ks_stats.kp_dropped.value.ui64++;
1600 1606 return;
1601 1607 }
1602 1608 ROTATE_BUFFERS(d);
1603 1609 do_wakeup = 1;
1604 1610 curlen = 0;
1605 1611 } else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
1606 1612 /*
1607 1613 * Immediate mode is set, or the read timeout has
1608 1614 * already expired during a select call. A packet
1609 1615 * arrived, so the reader should be woken up.
1610 1616 */
1611 1617 do_wakeup = 1;
1612 1618 }
1613 1619
1614 1620 /*
1615 1621 * Append the bpf header to the existing buffer before we add
1616 1622 * on the actual packet data.
1617 1623 */
1618 1624 hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen);
1619 1625 hp->bh_tstamp.tv_sec = tv->tv_sec;
1620 1626 hp->bh_tstamp.tv_usec = tv->tv_usec;
1621 1627 hp->bh_datalen = pktlen;
1622 1628 hp->bh_hdrlen = (uint16_t)hdrlen;
1623 1629 /*
1624 1630 * Copy the packet data into the store buffer and update its length.
1625 1631 */
1626 1632 (*cpfn)((uchar_t *)hp + hdrlen, pkt,
1627 1633 (hp->bh_caplen = totlen - hdrlen));
1628 1634 d->bd_slen = curlen + totlen;
1629 1635
1630 1636 /*
1631 1637 * Call bpf_wakeup after bd_slen has been updated.
1632 1638 */
1633 1639 if (do_wakeup)
1634 1640 bpf_wakeup(d);
1635 1641 }
1636 1642
1637 1643 /*
1638 1644 * Initialize all nonzero fields of a descriptor.
1639 1645 */
1640 1646 static int
1641 1647 bpf_allocbufs(struct bpf_d *d)
1642 1648 {
1643 1649
1644 1650 d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
1645 1651 if (!d->bd_fbuf)
1646 1652 return (ENOBUFS);
1647 1653 d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
1648 1654 if (!d->bd_sbuf) {
1649 1655 kmem_free(d->bd_fbuf, d->bd_bufsize);
1650 1656 return (ENOBUFS);
1651 1657 }
1652 1658 d->bd_slen = 0;
1653 1659 d->bd_hlen = 0;
1654 1660 return (0);
1655 1661 }
1656 1662
1657 1663 /*
1658 1664 * Free buffers currently in use by a descriptor.
1659 1665 * Called on close.
1660 1666 */
1661 1667 static void
1662 1668 bpf_freed(struct bpf_d *d)
1663 1669 {
1664 1670 /*
1665 1671 * At this point the descriptor has been detached from its
1666 1672 * interface and it yet hasn't been marked free.
1667 1673 */
1668 1674 if (d->bd_sbuf != 0) {
1669 1675 kmem_free(d->bd_sbuf, d->bd_bufsize);
1670 1676 if (d->bd_hbuf != 0)
1671 1677 kmem_free(d->bd_hbuf, d->bd_bufsize);
1672 1678 if (d->bd_fbuf != 0)
1673 1679 kmem_free(d->bd_fbuf, d->bd_bufsize);
1674 1680 }
1675 1681 if (d->bd_filter)
1676 1682 kmem_free(d->bd_filter, d->bd_filter_size);
1677 1683 }
1678 1684
1679 1685 /*
1680 1686 * Get a list of available data link type of the interface.
1681 1687 */
1682 1688 static int
1683 1689 bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *listp)
1684 1690 {
1685 1691 bpf_provider_list_t *bp;
1686 1692 bpf_provider_t *bpr;
1687 1693 zoneid_t zoneid;
1688 1694 uintptr_t mcip;
1689 1695 uint_t nicdlt;
1690 1696 uintptr_t mh;
1691 1697 int error;
1692 1698 int n;
1693 1699
1694 1700 n = 0;
1695 1701 mh = 0;
1696 1702 mcip = 0;
1697 1703 error = 0;
1698 1704 mutex_enter(&d->bd_lock);
1699 1705 LIST_FOREACH(bp, &bpf_providers, bpl_next) {
1700 1706 bpr = bp->bpl_what;
1701 1707 error = MBPF_OPEN(bpr, d->bd_ifname, &mh, d->bd_zone);
1702 1708 if (error != 0)
1703 1709 goto next;
1704 1710 error = MBPF_CLIENT_OPEN(bpr, mh, &mcip);
1705 1711 if (error != 0)
1706 1712 goto next;
1707 1713 error = MBPF_GET_ZONE(bpr, mh, &zoneid);
1708 1714 if (error != 0)
1709 1715 goto next;
1710 1716 if (d->bd_zone != GLOBAL_ZONEID &&
1711 1717 d->bd_zone != zoneid)
1712 1718 goto next;
1713 1719 error = MBPF_GET_DLT(bpr, mh, &nicdlt);
1714 1720 if (error != 0)
1715 1721 goto next;
1716 1722 nicdlt = bpf_dl_to_dlt(nicdlt);
1717 1723 if (listp->bfl_list != NULL) {
1718 1724 if (n >= listp->bfl_len) {
1719 1725 MBPF_CLIENT_CLOSE(bpr, mcip);
1720 1726 MBPF_CLOSE(bpr, mh);
1721 1727 break;
1722 1728 }
1723 1729 /*
1724 1730 * Bumping of bd_inuse ensures the structure does not
1725 1731 * disappear while the copyout runs and allows the for
1726 1732 * loop to be continued.
1727 1733 */
1728 1734 d->bd_inuse++;
1729 1735 mutex_exit(&d->bd_lock);
1730 1736 if (copyout(&nicdlt,
1731 1737 listp->bfl_list + n, sizeof (uint_t)) != 0)
1732 1738 error = EFAULT;
1733 1739 mutex_enter(&d->bd_lock);
1734 1740 if (error != 0)
1735 1741 break;
1736 1742 d->bd_inuse--;
1737 1743 }
1738 1744 n++;
1739 1745 next:
1740 1746 if (mcip != 0) {
1741 1747 MBPF_CLIENT_CLOSE(bpr, mcip);
1742 1748 mcip = 0;
1743 1749 }
1744 1750 if (mh != 0) {
1745 1751 MBPF_CLOSE(bpr, mh);
1746 1752 mh = 0;
1747 1753 }
1748 1754 }
1749 1755 mutex_exit(&d->bd_lock);
1750 1756
1751 1757 /*
1752 1758 * It is quite possible that one or more provider to BPF may not
1753 1759 * know about a link name whlist others do. In that case, so long
1754 1760 * as we have one success, do not declare an error unless it was
1755 1761 * an EFAULT as this indicates a problem that needs to be reported.
1756 1762 */
1757 1763 if ((error != EFAULT) && (n > 0))
1758 1764 error = 0;
1759 1765
1760 1766 listp->bfl_len = n;
1761 1767 return (error);
1762 1768 }
1763 1769
1764 1770 /*
1765 1771 * Set the data link type of a BPF instance.
1766 1772 */
1767 1773 static int
1768 1774 bpf_setdlt(struct bpf_d *d, void *addr)
1769 1775 {
1770 1776 char ifname[LIFNAMSIZ+1];
1771 1777 zoneid_t niczone;
1772 1778 int error;
1773 1779 int dlt;
1774 1780
1775 1781 if (copyin(addr, &dlt, sizeof (dlt)) != 0)
1776 1782 return (EFAULT);
1777 1783
1778 1784 mutex_enter(&d->bd_lock);
1779 1785
1780 1786 if (d->bd_bif == 0) { /* Interface not set */
1781 1787 mutex_exit(&d->bd_lock);
1782 1788 return (EINVAL);
1783 1789 }
1784 1790 if (d->bd_dlt == dlt) { /* NULL-op */
1785 1791 mutex_exit(&d->bd_lock);
1786 1792 return (0);
1787 1793 }
1788 1794
1789 1795 error = MBPF_GET_ZONE(&d->bd_mac, d->bd_bif, &niczone);
1790 1796 if (error != 0) {
1791 1797 mutex_exit(&d->bd_lock);
1792 1798 return (error);
1793 1799 }
1794 1800
1795 1801 /*
1796 1802 * See the matrix at the top of the file for the permissions table
1797 1803 * enforced by this driver.
1798 1804 */
1799 1805 if ((d->bd_zone != GLOBAL_ZONEID) && (dlt != DLT_IPNET) &&
1800 1806 (niczone != d->bd_zone)) {
1801 1807 mutex_exit(&d->bd_lock);
1802 1808 return (EINVAL);
1803 1809 }
1804 1810
1805 1811 (void) strlcpy(ifname, d->bd_ifname, sizeof (ifname));
1806 1812 d->bd_inuse = -1;
1807 1813 bpf_detachd(d);
1808 1814 error = bpf_attachd(d, ifname, dlt);
1809 1815 reset_d(d);
1810 1816 d->bd_inuse = 0;
1811 1817
1812 1818 mutex_exit(&d->bd_lock);
1813 1819 return (error);
1814 1820 }
1815 1821
1816 1822 /*
1817 1823 * bpf_clear_timeout is called with the bd_lock mutex held, providing it
1818 1824 * with the necessary protection to retrieve and modify bd_callout but it
1819 1825 * does not hold the lock for its entire duration... see below...
1820 1826 */
1821 1827 static void
1822 1828 bpf_clear_timeout(struct bpf_d *d)
1823 1829 {
1824 1830 timeout_id_t tid = d->bd_callout;
1825 1831 d->bd_callout = 0;
1826 1832 d->bd_inuse++;
1827 1833
1828 1834 /*
1829 1835 * If the timeout has fired and is waiting on bd_lock, we could
1830 1836 * deadlock here because untimeout if bd_lock is held and would
1831 1837 * wait for bpf_timed_out to finish and it never would.
1832 1838 */
1833 1839 if (tid != 0) {
1834 1840 mutex_exit(&d->bd_lock);
1835 1841 (void) untimeout(tid);
1836 1842 mutex_enter(&d->bd_lock);
1837 1843 }
1838 1844
1839 1845 d->bd_inuse--;
1840 1846 }
1841 1847
1842 1848 /*
1843 1849 * As a cloning device driver, BPF needs to keep track of which device
1844 1850 * numbers are in use and which ones are not. A hash table, indexed by
1845 1851 * the minor device number, is used to store the pointers to the
1846 1852 * individual descriptors that are allocated in bpfopen().
1847 1853 * The functions below present the interface for that hash table to
1848 1854 * the rest of the driver.
1849 1855 */
1850 1856 static struct bpf_d *
1851 1857 bpf_dev_find(minor_t minor)
1852 1858 {
1853 1859 struct bpf_d *d = NULL;
1854 1860
1855 1861 (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor,
1856 1862 (mod_hash_val_t *)&d);
1857 1863
1858 1864 return (d);
1859 1865 }
1860 1866
1861 1867 static void
1862 1868 bpf_dev_add(struct bpf_d *d)
1863 1869 {
1864 1870 (void) mod_hash_insert(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev,
1865 1871 (mod_hash_val_t)d);
1866 1872 }
1867 1873
1868 1874 static void
1869 1875 bpf_dev_remove(struct bpf_d *d)
1870 1876 {
1871 1877 struct bpf_d *stor;
1872 1878
1873 1879 (void) mod_hash_remove(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev,
1874 1880 (mod_hash_val_t *)&stor);
1875 1881 ASSERT(stor == d);
1876 1882 }
1877 1883
1878 1884 /*
1879 1885 * bpf_def_get should only ever be called for a minor number that exists,
1880 1886 * thus there should always be a pointer in the hash table that corresponds
1881 1887 * to it.
1882 1888 */
1883 1889 static struct bpf_d *
1884 1890 bpf_dev_get(minor_t minor)
1885 1891 {
1886 1892 struct bpf_d *d = NULL;
1887 1893
1888 1894 (void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor,
1889 1895 (mod_hash_val_t *)&d);
1890 1896 ASSERT(d != NULL);
1891 1897
1892 1898 return (d);
1893 1899 }
|
↓ open down ↓ |
458 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX