Print this page
8634 epoll fails to wake on certain edge-triggered conditions
8635 epoll should not emit POLLNVAL
8636 recursive epoll should emit EPOLLRDNORM
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/fs/fs_subr.c
+++ new/usr/src/uts/common/fs/fs_subr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
|
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 22 /* All Rights Reserved */
23 23
24 24
25 25 /*
26 26 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27 27 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
28 - * Copyright 2015 Joyent, Inc.
28 + * Copyright 2017 Joyent, Inc.
29 29 */
30 30
31 31 /*
32 32 * Generic vnode operations.
33 33 */
34 34 #include <sys/types.h>
35 35 #include <sys/param.h>
36 36 #include <sys/systm.h>
37 37 #include <sys/errno.h>
38 38 #include <sys/fcntl.h>
39 39 #include <sys/flock.h>
40 40 #include <sys/statvfs.h>
41 41 #include <sys/vfs.h>
42 42 #include <sys/vnode.h>
43 43 #include <sys/proc.h>
44 44 #include <sys/user.h>
45 45 #include <sys/unistd.h>
46 46 #include <sys/cred.h>
47 47 #include <sys/poll.h>
48 48 #include <sys/debug.h>
49 49 #include <sys/cmn_err.h>
50 50 #include <sys/stream.h>
51 51 #include <fs/fs_subr.h>
52 52 #include <fs/fs_reparse.h>
53 53 #include <sys/door.h>
54 54 #include <sys/acl.h>
55 55 #include <sys/share.h>
56 56 #include <sys/file.h>
57 57 #include <sys/kmem.h>
58 58 #include <sys/file.h>
59 59 #include <sys/nbmlock.h>
60 60 #include <acl/acl_common.h>
61 61 #include <sys/pathname.h>
62 62
63 63 static callb_cpr_t *frlock_serialize_blocked(flk_cb_when_t, void *);
64 64
65 65 /*
66 66 * Tunable to limit the number of retry to recover from STALE error.
67 67 */
68 68 int fs_estale_retry = 5;
69 69
70 70 /*
71 71 * supports for reparse point door upcall
72 72 */
73 73 static door_handle_t reparsed_door;
74 74 static kmutex_t reparsed_door_lock;
75 75
76 76 /*
77 77 * The associated operation is not supported by the file system.
78 78 */
79 79 int
80 80 fs_nosys()
81 81 {
82 82 return (ENOSYS);
83 83 }
84 84
85 85 /*
86 86 * The associated operation is invalid (on this vnode).
87 87 */
88 88 int
89 89 fs_inval()
90 90 {
91 91 return (EINVAL);
92 92 }
93 93
94 94 /*
95 95 * The associated operation is valid only for directories.
96 96 */
97 97 int
98 98 fs_notdir()
99 99 {
100 100 return (ENOTDIR);
101 101 }
102 102
103 103 /*
104 104 * Free the file system specific resources. For the file systems that
105 105 * do not support the forced unmount, it will be a nop function.
106 106 */
107 107
108 108 /*ARGSUSED*/
109 109 void
110 110 fs_freevfs(vfs_t *vfsp)
111 111 {
112 112 }
113 113
114 114 /* ARGSUSED */
115 115 int
116 116 fs_nosys_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
117 117 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
118 118 caller_context_t *ct)
119 119 {
120 120 return (ENOSYS);
121 121 }
122 122
123 123 /* ARGSUSED */
124 124 int
125 125 fs_nosys_addmap(struct vnode *vp, offset_t off, struct as *as, caddr_t addr,
126 126 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
127 127 caller_context_t *ct)
128 128 {
129 129 return (ENOSYS);
130 130 }
131 131
132 132 /* ARGSUSED */
133 133 int
134 134 fs_nosys_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
135 135 struct pollhead **phpp, caller_context_t *ct)
136 136 {
137 137 return (ENOSYS);
138 138 }
139 139
140 140
141 141 /*
142 142 * The file system has nothing to sync to disk. However, the
143 143 * VFS_SYNC operation must not fail.
144 144 */
145 145 /* ARGSUSED */
146 146 int
147 147 fs_sync(struct vfs *vfspp, short flag, cred_t *cr)
148 148 {
149 149 return (0);
150 150 }
151 151
152 152 /*
153 153 * Does nothing but VOP_FSYNC must not fail.
154 154 */
155 155 /* ARGSUSED */
156 156 int
157 157 fs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
158 158 {
159 159 return (0);
160 160 }
161 161
162 162 /*
163 163 * Does nothing but VOP_PUTPAGE must not fail.
164 164 */
165 165 /* ARGSUSED */
166 166 int
167 167 fs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
168 168 caller_context_t *ctp)
169 169 {
170 170 return (0);
171 171 }
172 172
173 173 /*
174 174 * Does nothing but VOP_IOCTL must not fail.
175 175 */
176 176 /* ARGSUSED */
177 177 int
178 178 fs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
179 179 int *rvalp)
180 180 {
181 181 return (0);
182 182 }
183 183
184 184 /*
185 185 * Read/write lock/unlock. Does nothing.
186 186 */
187 187 /* ARGSUSED */
188 188 int
189 189 fs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
190 190 {
191 191 return (-1);
192 192 }
193 193
194 194 /* ARGSUSED */
195 195 void
196 196 fs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
197 197 {
198 198 }
199 199
200 200 /*
201 201 * Compare two vnodes.
202 202 */
203 203 /*ARGSUSED2*/
204 204 int
205 205 fs_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
206 206 {
207 207 return (vp1 == vp2);
208 208 }
209 209
210 210 /*
211 211 * No-op seek operation.
212 212 */
213 213 /* ARGSUSED */
214 214 int
215 215 fs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
216 216 {
217 217 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
218 218 }
219 219
220 220 /*
221 221 * File and record locking.
222 222 */
223 223 /* ARGSUSED */
224 224 int
225 225 fs_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
226 226 flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
227 227 {
228 228 int frcmd;
229 229 int nlmid;
230 230 int error = 0;
231 231 boolean_t skip_lock = B_FALSE;
232 232 flk_callback_t serialize_callback;
233 233 int serialize = 0;
234 234 v_mode_t mode;
235 235
236 236 switch (cmd) {
237 237
238 238 case F_GETLK:
239 239 case F_O_GETLK:
240 240 if (flag & F_REMOTELOCK) {
241 241 frcmd = RCMDLCK;
242 242 } else if (flag & F_PXFSLOCK) {
243 243 frcmd = PCMDLCK;
244 244 } else {
245 245 frcmd = 0;
246 246 bfp->l_pid = ttoproc(curthread)->p_pid;
247 247 bfp->l_sysid = 0;
248 248 }
249 249 break;
250 250
251 251 case F_OFD_GETLK:
252 252 /*
253 253 * TBD we do not support remote OFD locks at this time.
254 254 */
255 255 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
256 256 error = EINVAL;
257 257 goto done;
258 258 }
259 259 skip_lock = B_TRUE;
260 260 break;
261 261
262 262 case F_SETLK_NBMAND:
263 263 /*
264 264 * Are NBMAND locks allowed on this file?
265 265 */
266 266 if (!vp->v_vfsp ||
267 267 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
268 268 error = EINVAL;
269 269 goto done;
270 270 }
271 271 if (vp->v_type != VREG) {
272 272 error = EINVAL;
273 273 goto done;
274 274 }
275 275 /*FALLTHROUGH*/
276 276
277 277 case F_SETLK:
278 278 if (flag & F_REMOTELOCK) {
279 279 frcmd = SETFLCK|RCMDLCK;
280 280 } else if (flag & F_PXFSLOCK) {
281 281 frcmd = SETFLCK|PCMDLCK;
282 282 } else {
283 283 frcmd = SETFLCK;
284 284 bfp->l_pid = ttoproc(curthread)->p_pid;
285 285 bfp->l_sysid = 0;
286 286 }
287 287 if (cmd == F_SETLK_NBMAND &&
288 288 (bfp->l_type == F_RDLCK || bfp->l_type == F_WRLCK)) {
289 289 frcmd |= NBMLCK;
290 290 }
291 291
292 292 if (nbl_need_check(vp)) {
293 293 nbl_start_crit(vp, RW_WRITER);
294 294 serialize = 1;
295 295 if (frcmd & NBMLCK) {
296 296 mode = (bfp->l_type == F_RDLCK) ?
297 297 V_READ : V_RDANDWR;
298 298 if (vn_is_mapped(vp, mode)) {
299 299 error = EAGAIN;
300 300 goto done;
301 301 }
302 302 }
303 303 }
304 304 break;
305 305
306 306 case F_SETLKW:
307 307 if (flag & F_REMOTELOCK) {
308 308 frcmd = SETFLCK|SLPFLCK|RCMDLCK;
309 309 } else if (flag & F_PXFSLOCK) {
310 310 frcmd = SETFLCK|SLPFLCK|PCMDLCK;
311 311 } else {
312 312 frcmd = SETFLCK|SLPFLCK;
313 313 bfp->l_pid = ttoproc(curthread)->p_pid;
314 314 bfp->l_sysid = 0;
315 315 }
316 316
317 317 if (nbl_need_check(vp)) {
318 318 nbl_start_crit(vp, RW_WRITER);
319 319 serialize = 1;
320 320 }
321 321 break;
322 322
323 323 case F_OFD_SETLK:
324 324 case F_OFD_SETLKW:
325 325 case F_FLOCK:
326 326 case F_FLOCKW:
327 327 /*
328 328 * TBD we do not support remote OFD locks at this time.
329 329 */
330 330 if (flag & (F_REMOTELOCK | F_PXFSLOCK)) {
331 331 error = EINVAL;
332 332 goto done;
333 333 }
334 334 skip_lock = B_TRUE;
335 335 break;
336 336
337 337 case F_HASREMOTELOCKS:
338 338 nlmid = GETNLMID(bfp->l_sysid);
339 339 if (nlmid != 0) { /* booted as a cluster */
340 340 l_has_rmt(bfp) =
341 341 cl_flk_has_remote_locks_for_nlmid(vp, nlmid);
342 342 } else { /* not booted as a cluster */
343 343 l_has_rmt(bfp) = flk_has_remote_locks(vp);
344 344 }
345 345
346 346 goto done;
347 347
348 348 default:
349 349 error = EINVAL;
350 350 goto done;
351 351 }
352 352
353 353 /*
354 354 * If this is a blocking lock request and we're serializing lock
355 355 * requests, modify the callback list to leave the critical region
356 356 * while we're waiting for the lock.
357 357 */
358 358
359 359 if (serialize && (frcmd & SLPFLCK) != 0) {
360 360 flk_add_callback(&serialize_callback,
361 361 frlock_serialize_blocked, vp, flk_cbp);
362 362 flk_cbp = &serialize_callback;
363 363 }
364 364
365 365 if (!skip_lock)
366 366 error = reclock(vp, bfp, frcmd, flag, offset, flk_cbp);
367 367
368 368 if (serialize && (frcmd & SLPFLCK) != 0)
369 369 flk_del_callback(&serialize_callback);
370 370
371 371 done:
372 372 if (serialize)
373 373 nbl_end_crit(vp);
374 374
375 375 return (error);
376 376 }
377 377
378 378 /*
379 379 * Callback when a lock request blocks and we are serializing requests. If
380 380 * before sleeping, leave the critical region. If after wakeup, reenter
381 381 * the critical region.
382 382 */
383 383
384 384 static callb_cpr_t *
385 385 frlock_serialize_blocked(flk_cb_when_t when, void *infop)
386 386 {
387 387 vnode_t *vp = (vnode_t *)infop;
388 388
389 389 if (when == FLK_BEFORE_SLEEP)
390 390 nbl_end_crit(vp);
391 391 else {
392 392 nbl_start_crit(vp, RW_WRITER);
393 393 }
394 394
395 395 return (NULL);
396 396 }
397 397
398 398 /*
399 399 * Allow any flags.
400 400 */
401 401 /* ARGSUSED */
402 402 int
403 403 fs_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
404 404 {
405 405 return (0);
406 406 }
407 407
408 408 /*
|
↓ open down ↓ |
370 lines elided |
↑ open up ↑ |
409 409 * Return the answer requested to poll() for non-device files.
410 410 * Only POLLIN, POLLRDNORM, and POLLOUT are recognized.
411 411 */
412 412 struct pollhead fs_pollhd;
413 413
414 414 /* ARGSUSED */
415 415 int
416 416 fs_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
417 417 struct pollhead **phpp, caller_context_t *ct)
418 418 {
419 + /*
420 + * Reject all attempts for edge-triggered polling. These should only
421 + * occur when regular files are added to a /dev/poll handle which is in
422 + * epoll mode. The Linux epoll does not allow epoll-ing on regular
423 + * files at all, so rejecting EPOLLET requests is congruent with those
424 + * expectations.
425 + */
426 + if (events & POLLET) {
427 + return (EPERM);
428 + }
429 +
419 430 *reventsp = 0;
420 431 if (events & POLLIN)
421 432 *reventsp |= POLLIN;
422 433 if (events & POLLRDNORM)
423 434 *reventsp |= POLLRDNORM;
424 435 if (events & POLLRDBAND)
425 436 *reventsp |= POLLRDBAND;
426 437 if (events & POLLOUT)
427 438 *reventsp |= POLLOUT;
428 439 if (events & POLLWRBAND)
429 440 *reventsp |= POLLWRBAND;
430 - *phpp = !anyyet && !*reventsp ? &fs_pollhd : (struct pollhead *)NULL;
441 + /*
442 + * Emitting a pollhead without the intention of issuing pollwakeup()
443 + * calls against it is a recipe for trouble. It's only acceptable in
444 + * this case since the above logic matches practically all useful
445 + * events.
446 + */
447 + if (*reventsp == 0 && !anyyet) {
448 + *phpp = &fs_pollhd;
449 + }
431 450 return (0);
432 451 }
433 452
434 453 /*
435 454 * POSIX pathconf() support.
436 455 */
437 456 /* ARGSUSED */
438 457 int
439 458 fs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
440 459 caller_context_t *ct)
441 460 {
442 461 ulong_t val;
443 462 int error = 0;
444 463 struct statvfs64 vfsbuf;
445 464
446 465 switch (cmd) {
447 466
448 467 case _PC_LINK_MAX:
449 468 val = MAXLINK;
450 469 break;
451 470
452 471 case _PC_MAX_CANON:
453 472 val = MAX_CANON;
454 473 break;
455 474
456 475 case _PC_MAX_INPUT:
457 476 val = MAX_INPUT;
458 477 break;
459 478
460 479 case _PC_NAME_MAX:
461 480 bzero(&vfsbuf, sizeof (vfsbuf));
462 481 if (error = VFS_STATVFS(vp->v_vfsp, &vfsbuf))
463 482 break;
464 483 val = vfsbuf.f_namemax;
465 484 break;
466 485
467 486 case _PC_PATH_MAX:
468 487 case _PC_SYMLINK_MAX:
469 488 val = MAXPATHLEN;
470 489 break;
471 490
472 491 case _PC_PIPE_BUF:
473 492 val = PIPE_BUF;
474 493 break;
475 494
476 495 case _PC_NO_TRUNC:
477 496 if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
478 497 val = 1; /* NOTRUNC is enabled for vp */
479 498 else
480 499 val = (ulong_t)-1;
481 500 break;
482 501
483 502 case _PC_VDISABLE:
484 503 val = _POSIX_VDISABLE;
485 504 break;
486 505
487 506 case _PC_CHOWN_RESTRICTED:
488 507 if (rstchown)
489 508 val = rstchown; /* chown restricted enabled */
490 509 else
491 510 val = (ulong_t)-1;
492 511 break;
493 512
494 513 case _PC_FILESIZEBITS:
495 514
496 515 /*
497 516 * If ever we come here it means that underlying file system
498 517 * does not recognise the command and therefore this
499 518 * configurable limit cannot be determined. We return -1
500 519 * and don't change errno.
501 520 */
502 521
503 522 val = (ulong_t)-1; /* large file support */
504 523 break;
505 524
506 525 case _PC_ACL_ENABLED:
507 526 val = 0;
508 527 break;
509 528
510 529 case _PC_CASE_BEHAVIOR:
511 530 val = _CASE_SENSITIVE;
512 531 if (vfs_has_feature(vp->v_vfsp, VFSFT_CASEINSENSITIVE) == 1)
513 532 val |= _CASE_INSENSITIVE;
514 533 if (vfs_has_feature(vp->v_vfsp, VFSFT_NOCASESENSITIVE) == 1)
515 534 val &= ~_CASE_SENSITIVE;
516 535 break;
517 536
518 537 case _PC_SATTR_ENABLED:
519 538 case _PC_SATTR_EXISTS:
520 539 val = 0;
521 540 break;
522 541
523 542 case _PC_ACCESS_FILTERING:
524 543 val = 0;
525 544 break;
526 545
527 546 default:
528 547 error = EINVAL;
529 548 break;
530 549 }
531 550
532 551 if (error == 0)
533 552 *valp = val;
534 553 return (error);
535 554 }
536 555
537 556 /*
538 557 * Dispose of a page.
539 558 */
540 559 /* ARGSUSED */
541 560 void
542 561 fs_dispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
543 562 caller_context_t *ct)
544 563 {
545 564
546 565 ASSERT(fl == B_FREE || fl == B_INVAL);
547 566
548 567 if (fl == B_FREE)
549 568 page_free(pp, dn);
550 569 else
551 570 page_destroy(pp, dn);
552 571 }
553 572
554 573 /* ARGSUSED */
555 574 void
556 575 fs_nodispose(struct vnode *vp, page_t *pp, int fl, int dn, struct cred *cr,
557 576 caller_context_t *ct)
558 577 {
559 578 cmn_err(CE_PANIC, "fs_nodispose invoked");
560 579 }
561 580
562 581 /*
563 582 * fabricate acls for file systems that do not support acls.
564 583 */
565 584 /* ARGSUSED */
566 585 int
567 586 fs_fab_acl(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr,
568 587 caller_context_t *ct)
569 588 {
570 589 aclent_t *aclentp;
571 590 struct vattr vattr;
572 591 int error;
573 592 size_t aclsize;
574 593
575 594 vsecattr->vsa_aclcnt = 0;
576 595 vsecattr->vsa_aclentsz = 0;
577 596 vsecattr->vsa_aclentp = NULL;
578 597 vsecattr->vsa_dfaclcnt = 0; /* Default ACLs are not fabricated */
579 598 vsecattr->vsa_dfaclentp = NULL;
580 599
581 600 vattr.va_mask = AT_MODE | AT_UID | AT_GID;
582 601 if (error = VOP_GETATTR(vp, &vattr, 0, cr, ct))
583 602 return (error);
584 603
585 604 if (vsecattr->vsa_mask & (VSA_ACLCNT | VSA_ACL)) {
586 605 aclsize = 4 * sizeof (aclent_t);
587 606 vsecattr->vsa_aclcnt = 4; /* USER, GROUP, OTHER, and CLASS */
588 607 vsecattr->vsa_aclentp = kmem_zalloc(aclsize, KM_SLEEP);
589 608 aclentp = vsecattr->vsa_aclentp;
590 609
591 610 aclentp->a_type = USER_OBJ; /* Owner */
592 611 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0700)) >> 6;
593 612 aclentp->a_id = vattr.va_uid; /* Really undefined */
594 613 aclentp++;
595 614
596 615 aclentp->a_type = GROUP_OBJ; /* Group */
597 616 aclentp->a_perm = ((ushort_t)(vattr.va_mode & 0070)) >> 3;
598 617 aclentp->a_id = vattr.va_gid; /* Really undefined */
599 618 aclentp++;
600 619
601 620 aclentp->a_type = OTHER_OBJ; /* Other */
602 621 aclentp->a_perm = vattr.va_mode & 0007;
603 622 aclentp->a_id = (gid_t)-1; /* Really undefined */
604 623 aclentp++;
605 624
606 625 aclentp->a_type = CLASS_OBJ; /* Class */
607 626 aclentp->a_perm = (ushort_t)(0007);
608 627 aclentp->a_id = (gid_t)-1; /* Really undefined */
609 628 } else if (vsecattr->vsa_mask & (VSA_ACECNT | VSA_ACE)) {
610 629 VERIFY(0 == acl_trivial_create(vattr.va_mode,
611 630 (vp->v_type == VDIR), (ace_t **)&vsecattr->vsa_aclentp,
612 631 &vsecattr->vsa_aclcnt));
613 632 vsecattr->vsa_aclentsz = vsecattr->vsa_aclcnt * sizeof (ace_t);
614 633 }
615 634
616 635 return (error);
617 636 }
618 637
619 638 /*
620 639 * Common code for implementing DOS share reservations
621 640 */
622 641 /* ARGSUSED4 */
623 642 int
624 643 fs_shrlock(struct vnode *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
625 644 caller_context_t *ct)
626 645 {
627 646 int error;
628 647
629 648 /*
630 649 * Make sure that the file was opened with permissions appropriate
631 650 * for the request, and make sure the caller isn't trying to sneak
632 651 * in an NBMAND request.
633 652 */
634 653 if (cmd == F_SHARE) {
635 654 if (((shr->s_access & F_RDACC) && (flag & FREAD) == 0) ||
636 655 ((shr->s_access & F_WRACC) && (flag & FWRITE) == 0))
637 656 return (EBADF);
638 657 if (shr->s_access & (F_RMACC | F_MDACC))
639 658 return (EINVAL);
640 659 if (shr->s_deny & (F_MANDDNY | F_RMDNY))
641 660 return (EINVAL);
642 661 }
643 662 if (cmd == F_SHARE_NBMAND) {
644 663 /* make sure nbmand is allowed on the file */
645 664 if (!vp->v_vfsp ||
646 665 !(vp->v_vfsp->vfs_flag & VFS_NBMAND)) {
647 666 return (EINVAL);
648 667 }
649 668 if (vp->v_type != VREG) {
650 669 return (EINVAL);
651 670 }
652 671 }
653 672
654 673 nbl_start_crit(vp, RW_WRITER);
655 674
656 675 switch (cmd) {
657 676
658 677 case F_SHARE_NBMAND:
659 678 shr->s_deny |= F_MANDDNY;
660 679 /*FALLTHROUGH*/
661 680 case F_SHARE:
662 681 error = add_share(vp, shr);
663 682 break;
664 683
665 684 case F_UNSHARE:
666 685 error = del_share(vp, shr);
667 686 break;
668 687
669 688 case F_HASREMOTELOCKS:
670 689 /*
671 690 * We are overloading this command to refer to remote
672 691 * shares as well as remote locks, despite its name.
673 692 */
674 693 shr->s_access = shr_has_remote_shares(vp, shr->s_sysid);
675 694 error = 0;
676 695 break;
677 696
678 697 default:
679 698 error = EINVAL;
680 699 break;
681 700 }
682 701
683 702 nbl_end_crit(vp);
684 703 return (error);
685 704 }
686 705
687 706 /*ARGSUSED1*/
688 707 int
689 708 fs_vnevent_nosupport(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
690 709 caller_context_t *ct)
691 710 {
692 711 ASSERT(vp != NULL);
693 712 return (ENOTSUP);
694 713 }
695 714
696 715 /*ARGSUSED1*/
697 716 int
698 717 fs_vnevent_support(vnode_t *vp, vnevent_t e, vnode_t *dvp, char *fnm,
699 718 caller_context_t *ct)
700 719 {
701 720 ASSERT(vp != NULL);
702 721 return (0);
703 722 }
704 723
705 724 /*
706 725 * return 1 for non-trivial ACL.
707 726 *
708 727 * NB: It is not necessary for the caller to VOP_RWLOCK since
709 728 * we only issue VOP_GETSECATTR.
710 729 *
711 730 * Returns 0 == trivial
712 731 * 1 == NOT Trivial
713 732 * <0 could not determine.
714 733 */
715 734 int
716 735 fs_acl_nontrivial(vnode_t *vp, cred_t *cr)
717 736 {
718 737 ulong_t acl_styles;
719 738 ulong_t acl_flavor;
720 739 vsecattr_t vsecattr;
721 740 int error;
722 741 int isnontrivial;
723 742
724 743 /* determine the forms of ACLs maintained */
725 744 error = VOP_PATHCONF(vp, _PC_ACL_ENABLED, &acl_styles, cr, NULL);
726 745
727 746 /* clear bits we don't understand and establish default acl_style */
728 747 acl_styles &= (_ACL_ACLENT_ENABLED | _ACL_ACE_ENABLED);
729 748 if (error || (acl_styles == 0))
730 749 acl_styles = _ACL_ACLENT_ENABLED;
731 750
732 751 vsecattr.vsa_aclentp = NULL;
733 752 vsecattr.vsa_dfaclentp = NULL;
734 753 vsecattr.vsa_aclcnt = 0;
735 754 vsecattr.vsa_dfaclcnt = 0;
736 755
737 756 while (acl_styles) {
738 757 /* select one of the styles as current flavor */
739 758 acl_flavor = 0;
740 759 if (acl_styles & _ACL_ACLENT_ENABLED) {
741 760 acl_flavor = _ACL_ACLENT_ENABLED;
742 761 vsecattr.vsa_mask = VSA_ACLCNT | VSA_DFACLCNT;
743 762 } else if (acl_styles & _ACL_ACE_ENABLED) {
744 763 acl_flavor = _ACL_ACE_ENABLED;
745 764 vsecattr.vsa_mask = VSA_ACECNT | VSA_ACE;
746 765 }
747 766
748 767 ASSERT(vsecattr.vsa_mask && acl_flavor);
749 768 error = VOP_GETSECATTR(vp, &vsecattr, 0, cr, NULL);
750 769 if (error == 0)
751 770 break;
752 771
753 772 /* that flavor failed */
754 773 acl_styles &= ~acl_flavor;
755 774 }
756 775
757 776 /* if all styles fail then assume trivial */
758 777 if (acl_styles == 0)
759 778 return (0);
760 779
761 780 /* process the flavor that worked */
762 781 isnontrivial = 0;
763 782 if (acl_flavor & _ACL_ACLENT_ENABLED) {
764 783 if (vsecattr.vsa_aclcnt > MIN_ACL_ENTRIES)
765 784 isnontrivial = 1;
766 785 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
767 786 kmem_free(vsecattr.vsa_aclentp,
768 787 vsecattr.vsa_aclcnt * sizeof (aclent_t));
769 788 if (vsecattr.vsa_dfaclcnt && vsecattr.vsa_dfaclentp != NULL)
770 789 kmem_free(vsecattr.vsa_dfaclentp,
771 790 vsecattr.vsa_dfaclcnt * sizeof (aclent_t));
772 791 }
773 792 if (acl_flavor & _ACL_ACE_ENABLED) {
774 793 isnontrivial = ace_trivial(vsecattr.vsa_aclentp,
775 794 vsecattr.vsa_aclcnt);
776 795
777 796 if (vsecattr.vsa_aclcnt && vsecattr.vsa_aclentp != NULL)
778 797 kmem_free(vsecattr.vsa_aclentp,
779 798 vsecattr.vsa_aclcnt * sizeof (ace_t));
780 799 /* ACE has no vsecattr.vsa_dfaclcnt */
781 800 }
782 801 return (isnontrivial);
783 802 }
784 803
785 804 /*
786 805 * Check whether we need a retry to recover from STALE error.
787 806 */
788 807 int
789 808 fs_need_estale_retry(int retry_count)
790 809 {
791 810 if (retry_count < fs_estale_retry)
792 811 return (1);
793 812 else
794 813 return (0);
795 814 }
796 815
797 816
798 817 static int (*fs_av_scan)(vnode_t *, cred_t *, int) = NULL;
799 818
800 819 /*
801 820 * Routine for anti-virus scanner to call to register its scanning routine.
802 821 */
803 822 void
804 823 fs_vscan_register(int (*av_scan)(vnode_t *, cred_t *, int))
805 824 {
806 825 fs_av_scan = av_scan;
807 826 }
808 827
809 828 /*
810 829 * Routine for file systems to call to initiate anti-virus scanning.
811 830 * Scanning will only be done on REGular files (currently).
812 831 */
813 832 int
814 833 fs_vscan(vnode_t *vp, cred_t *cr, int async)
815 834 {
816 835 int ret = 0;
817 836
818 837 if (fs_av_scan && vp->v_type == VREG)
819 838 ret = (*fs_av_scan)(vp, cr, async);
820 839
821 840 return (ret);
822 841 }
823 842
824 843 /*
825 844 * support functions for reparse point
826 845 */
827 846 /*
828 847 * reparse_vnode_parse
829 848 *
830 849 * Read the symlink data of a reparse point specified by the vnode
831 850 * and return the reparse data as name-value pair in the nvlist.
832 851 */
833 852 int
834 853 reparse_vnode_parse(vnode_t *vp, nvlist_t *nvl)
835 854 {
836 855 int err;
837 856 char *lkdata;
838 857 struct uio uio;
839 858 struct iovec iov;
840 859
841 860 if (vp == NULL || nvl == NULL)
842 861 return (EINVAL);
843 862
844 863 lkdata = kmem_alloc(MAXREPARSELEN, KM_SLEEP);
845 864
846 865 /*
847 866 * Set up io vector to read sym link data
848 867 */
849 868 iov.iov_base = lkdata;
850 869 iov.iov_len = MAXREPARSELEN;
851 870 uio.uio_iov = &iov;
852 871 uio.uio_iovcnt = 1;
853 872 uio.uio_segflg = UIO_SYSSPACE;
854 873 uio.uio_extflg = UIO_COPY_CACHED;
855 874 uio.uio_loffset = (offset_t)0;
856 875 uio.uio_resid = MAXREPARSELEN;
857 876
858 877 if ((err = VOP_READLINK(vp, &uio, kcred, NULL)) == 0) {
859 878 *(lkdata + MAXREPARSELEN - uio.uio_resid) = '\0';
860 879 err = reparse_parse(lkdata, nvl);
861 880 }
862 881 kmem_free(lkdata, MAXREPARSELEN); /* done with lkdata */
863 882
864 883 return (err);
865 884 }
866 885
867 886 void
868 887 reparse_point_init()
869 888 {
870 889 mutex_init(&reparsed_door_lock, NULL, MUTEX_DEFAULT, NULL);
871 890 }
872 891
873 892 static door_handle_t
874 893 reparse_door_get_handle()
875 894 {
876 895 door_handle_t dh;
877 896
878 897 mutex_enter(&reparsed_door_lock);
879 898 if ((dh = reparsed_door) == NULL) {
880 899 if (door_ki_open(REPARSED_DOOR, &reparsed_door) != 0) {
881 900 reparsed_door = NULL;
882 901 dh = NULL;
883 902 } else
884 903 dh = reparsed_door;
885 904 }
886 905 mutex_exit(&reparsed_door_lock);
887 906 return (dh);
888 907 }
889 908
890 909 static void
891 910 reparse_door_reset_handle()
892 911 {
893 912 mutex_enter(&reparsed_door_lock);
894 913 reparsed_door = NULL;
895 914 mutex_exit(&reparsed_door_lock);
896 915 }
897 916
898 917 /*
899 918 * reparse_kderef
900 919 *
901 920 * Accepts the service-specific item from the reparse point and returns
902 921 * the service-specific data requested. The caller specifies the size of
903 922 * the buffer provided via *bufsz; the routine will fail with EOVERFLOW
904 923 * if the results will not fit in the buffer, in which case, *bufsz will
905 924 * contain the number of bytes needed to hold the results.
906 925 *
907 926 * if ok return 0 and update *bufsize with length of actual result
908 927 * else return error code.
909 928 */
910 929 int
911 930 reparse_kderef(const char *svc_type, const char *svc_data, char *buf,
912 931 size_t *bufsize)
913 932 {
914 933 int err, retries, need_free, retried_doorhd;
915 934 size_t dlen, res_len;
916 935 char *darg;
917 936 door_arg_t door_args;
918 937 reparsed_door_res_t *resp;
919 938 door_handle_t rp_door;
920 939
921 940 if (svc_type == NULL || svc_data == NULL || buf == NULL ||
922 941 bufsize == NULL)
923 942 return (EINVAL);
924 943
925 944 /* get reparsed's door handle */
926 945 if ((rp_door = reparse_door_get_handle()) == NULL)
927 946 return (EBADF);
928 947
929 948 /* setup buffer for door_call args and results */
930 949 dlen = strlen(svc_type) + strlen(svc_data) + 2;
931 950 if (*bufsize < dlen) {
932 951 darg = kmem_alloc(dlen, KM_SLEEP);
933 952 need_free = 1;
934 953 } else {
935 954 darg = buf; /* use same buffer for door's args & results */
936 955 need_free = 0;
937 956 }
938 957
939 958 /* build argument string of door call */
940 959 (void) snprintf(darg, dlen, "%s:%s", svc_type, svc_data);
941 960
942 961 /* setup args for door call */
943 962 door_args.data_ptr = darg;
944 963 door_args.data_size = dlen;
945 964 door_args.desc_ptr = NULL;
946 965 door_args.desc_num = 0;
947 966 door_args.rbuf = buf;
948 967 door_args.rsize = *bufsize;
949 968
950 969 /* do the door_call */
951 970 retried_doorhd = 0;
952 971 retries = 0;
953 972 door_ki_hold(rp_door);
954 973 while ((err = door_ki_upcall_limited(rp_door, &door_args,
955 974 NULL, SIZE_MAX, 0)) != 0) {
956 975 if (err == EAGAIN || err == EINTR) {
957 976 if (++retries < REPARSED_DOORCALL_MAX_RETRY) {
958 977 delay(SEC_TO_TICK(1));
959 978 continue;
960 979 }
961 980 } else if (err == EBADF) {
962 981 /* door server goes away... */
963 982 reparse_door_reset_handle();
964 983
965 984 if (retried_doorhd == 0) {
966 985 door_ki_rele(rp_door);
967 986 retried_doorhd++;
968 987 rp_door = reparse_door_get_handle();
969 988 if (rp_door != NULL) {
970 989 door_ki_hold(rp_door);
971 990 continue;
972 991 }
973 992 }
974 993 }
975 994 break;
976 995 }
977 996
978 997 if (rp_door)
979 998 door_ki_rele(rp_door);
980 999
981 1000 if (need_free)
982 1001 kmem_free(darg, dlen); /* done with args buffer */
983 1002
984 1003 if (err != 0)
985 1004 return (err);
986 1005
987 1006 resp = (reparsed_door_res_t *)door_args.rbuf;
988 1007 if ((err = resp->res_status) == 0) {
989 1008 /*
990 1009 * have to save the length of the results before the
991 1010 * bcopy below since it's can be an overlap copy that
992 1011 * overwrites the reparsed_door_res_t structure at
993 1012 * the beginning of the buffer.
994 1013 */
995 1014 res_len = (size_t)resp->res_len;
996 1015
997 1016 /* deref call is ok */
998 1017 if (res_len > *bufsize)
999 1018 err = EOVERFLOW;
1000 1019 else
1001 1020 bcopy(resp->res_data, buf, res_len);
1002 1021 *bufsize = res_len;
1003 1022 }
1004 1023 if (door_args.rbuf != buf)
1005 1024 kmem_free(door_args.rbuf, door_args.rsize);
1006 1025
1007 1026 return (err);
1008 1027 }
|
↓ open down ↓ |
568 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX