1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved.
24 */
25
26 /*
27 * utility routines for the /dev fs
28 */
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/time.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/kmem.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 #include <sys/cred.h>
47 #include <sys/dirent.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/mode.h>
52 #include <sys/policy.h>
53 #include <fs/fs_subr.h>
54 #include <sys/mount.h>
55 #include <sys/fs/snode.h>
56 #include <sys/fs/dv_node.h>
57 #include <sys/fs/sdev_impl.h>
58 #include <sys/sunndi.h>
59 #include <sys/sunmdi.h>
60 #include <sys/conf.h>
61 #include <sys/proc.h>
62 #include <sys/user.h>
63 #include <sys/modctl.h>
64
65 #ifdef DEBUG
66 int sdev_debug = 0x00000001;
67 int sdev_debug_cache_flags = 0;
68 #endif
69
70 /*
71 * globals
72 */
73 /* prototype memory vattrs */
74 vattr_t sdev_vattr_dir = {
75 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
76 VDIR, /* va_type */
77 SDEV_DIRMODE_DEFAULT, /* va_mode */
78 SDEV_UID_DEFAULT, /* va_uid */
79 SDEV_GID_DEFAULT, /* va_gid */
80 0, /* va_fsid */
81 0, /* va_nodeid */
82 0, /* va_nlink */
83 0, /* va_size */
84 0, /* va_atime */
85 0, /* va_mtime */
86 0, /* va_ctime */
87 0, /* va_rdev */
88 0, /* va_blksize */
89 0, /* va_nblocks */
90 0 /* va_vcode */
91 };
92
93 vattr_t sdev_vattr_lnk = {
94 AT_TYPE|AT_MODE, /* va_mask */
95 VLNK, /* va_type */
96 SDEV_LNKMODE_DEFAULT, /* va_mode */
97 SDEV_UID_DEFAULT, /* va_uid */
98 SDEV_GID_DEFAULT, /* va_gid */
99 0, /* va_fsid */
100 0, /* va_nodeid */
101 0, /* va_nlink */
102 0, /* va_size */
103 0, /* va_atime */
104 0, /* va_mtime */
105 0, /* va_ctime */
106 0, /* va_rdev */
107 0, /* va_blksize */
108 0, /* va_nblocks */
109 0 /* va_vcode */
110 };
111
112 vattr_t sdev_vattr_blk = {
113 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
114 VBLK, /* va_type */
115 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */
116 SDEV_UID_DEFAULT, /* va_uid */
117 SDEV_GID_DEFAULT, /* va_gid */
118 0, /* va_fsid */
119 0, /* va_nodeid */
120 0, /* va_nlink */
121 0, /* va_size */
122 0, /* va_atime */
123 0, /* va_mtime */
124 0, /* va_ctime */
125 0, /* va_rdev */
126 0, /* va_blksize */
127 0, /* va_nblocks */
128 0 /* va_vcode */
129 };
130
131 vattr_t sdev_vattr_chr = {
132 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
133 VCHR, /* va_type */
134 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */
135 SDEV_UID_DEFAULT, /* va_uid */
136 SDEV_GID_DEFAULT, /* va_gid */
137 0, /* va_fsid */
138 0, /* va_nodeid */
139 0, /* va_nlink */
140 0, /* va_size */
141 0, /* va_atime */
142 0, /* va_mtime */
143 0, /* va_ctime */
144 0, /* va_rdev */
145 0, /* va_blksize */
146 0, /* va_nblocks */
147 0 /* va_vcode */
148 };
149
150 kmem_cache_t *sdev_node_cache; /* sdev_node cache */
151 int devtype; /* fstype */
152
153 static void
154 sdev_prof_free(struct sdev_node *dv)
155 {
156 ASSERT(!SDEV_IS_GLOBAL(dv));
157 nvlist_free(dv->sdev_prof.dev_name);
158 nvlist_free(dv->sdev_prof.dev_map);
159 nvlist_free(dv->sdev_prof.dev_symlink);
160 nvlist_free(dv->sdev_prof.dev_glob_incdir);
161 nvlist_free(dv->sdev_prof.dev_glob_excdir);
162 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
163 }
164
165 /* sdev_node cache constructor */
166 /*ARGSUSED1*/
167 static int
168 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
169 {
170 struct sdev_node *dv = (struct sdev_node *)buf;
171 struct vnode *vp;
172
173 bzero(buf, sizeof (struct sdev_node));
174 vp = dv->sdev_vnode = vn_alloc(flag);
175 if (vp == NULL) {
176 return (-1);
177 }
178 vp->v_data = dv;
179 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
180 return (0);
181 }
182
183 /* sdev_node cache destructor */
184 /*ARGSUSED1*/
185 static void
186 i_sdev_node_dtor(void *buf, void *arg)
187 {
188 struct sdev_node *dv = (struct sdev_node *)buf;
189 struct vnode *vp = SDEVTOV(dv);
190
191 rw_destroy(&dv->sdev_contents);
192 vn_free(vp);
193 }
194
195 /* initialize sdev_node cache */
196 void
197 sdev_node_cache_init()
198 {
199 int flags = 0;
200
201 #ifdef DEBUG
202 flags = sdev_debug_cache_flags;
203 if (flags)
204 sdcmn_err(("cache debug flags 0x%x\n", flags));
205 #endif /* DEBUG */
206
207 ASSERT(sdev_node_cache == NULL);
208 sdev_node_cache = kmem_cache_create("sdev_node_cache",
209 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
210 NULL, NULL, NULL, flags);
211 }
212
213 /* destroy sdev_node cache */
214 void
215 sdev_node_cache_fini()
216 {
217 ASSERT(sdev_node_cache != NULL);
218 kmem_cache_destroy(sdev_node_cache);
219 sdev_node_cache = NULL;
220 }
221
222 /*
223 * Compare two nodes lexographically to balance avl tree
224 */
225 static int
226 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
227 {
228 int rv;
229 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
230 return (0);
231 return ((rv < 0) ? -1 : 1);
232 }
233
234 void
235 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
236 {
237 ASSERT(dv);
238 ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
239 dv->sdev_state = state;
240 }
241
242 static void
243 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
244 {
245 timestruc_t now;
246 struct vattr *attrp;
247 uint_t mask;
248
249 ASSERT(dv->sdev_attr);
250 ASSERT(vap);
251
252 attrp = dv->sdev_attr;
253 mask = vap->va_mask;
254 if (mask & AT_TYPE)
255 attrp->va_type = vap->va_type;
256 if (mask & AT_MODE)
257 attrp->va_mode = vap->va_mode;
258 if (mask & AT_UID)
259 attrp->va_uid = vap->va_uid;
260 if (mask & AT_GID)
261 attrp->va_gid = vap->va_gid;
262 if (mask & AT_RDEV)
263 attrp->va_rdev = vap->va_rdev;
264
265 gethrestime(&now);
266 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
267 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
268 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
269 }
270
271 static void
272 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
273 {
274 ASSERT(dv->sdev_attr == NULL);
275 ASSERT(vap->va_mask & AT_TYPE);
276 ASSERT(vap->va_mask & AT_MODE);
277
278 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
279 sdev_attr_update(dv, vap);
280 }
281
282 /* alloc and initialize a sdev_node */
283 int
284 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
285 vattr_t *vap)
286 {
287 struct sdev_node *dv = NULL;
288 struct vnode *vp;
289 size_t nmlen, len;
290 devname_handle_t *dhl;
291
292 nmlen = strlen(nm) + 1;
293 if (nmlen > MAXNAMELEN) {
294 sdcmn_err9(("sdev_nodeinit: node name %s"
295 " too long\n", nm));
296 *newdv = NULL;
297 return (ENAMETOOLONG);
298 }
299
300 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
301
302 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
303 bcopy(nm, dv->sdev_name, nmlen);
304 dv->sdev_namelen = nmlen - 1; /* '\0' not included */
305 len = strlen(ddv->sdev_path) + strlen(nm) + 2;
306 dv->sdev_path = kmem_alloc(len, KM_SLEEP);
307 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
308 /* overwritten for VLNK nodes */
309 dv->sdev_symlink = NULL;
310 list_link_init(&dv->sdev_plist);
311
312 vp = SDEVTOV(dv);
313 vn_reinit(vp);
314 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
315 if (vap)
316 vp->v_type = vap->va_type;
317
318 /*
319 * initialized to the parent's vnodeops.
320 * maybe overwriten for a VDIR
321 */
322 vn_setops(vp, vn_getops(SDEVTOV(ddv)));
323 vn_exists(vp);
324
325 dv->sdev_dotdot = NULL;
326 dv->sdev_attrvp = NULL;
327 if (vap) {
328 sdev_attr_alloc(dv, vap);
329 } else {
330 dv->sdev_attr = NULL;
331 }
332
333 dv->sdev_ino = sdev_mkino(dv);
334 dv->sdev_nlink = 0; /* updated on insert */
335 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
336 dv->sdev_flags |= SDEV_BUILD;
337 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
338 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
339 if (SDEV_IS_GLOBAL(ddv)) {
340 dv->sdev_flags |= SDEV_GLOBAL;
341 dhl = &(dv->sdev_handle);
342 dhl->dh_data = dv;
343 dhl->dh_args = NULL;
344 sdev_set_no_negcache(dv);
345 dv->sdev_gdir_gen = 0;
346 } else {
347 dv->sdev_flags &= ~SDEV_GLOBAL;
348 dv->sdev_origin = NULL; /* set later */
349 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
350 dv->sdev_ldir_gen = 0;
351 dv->sdev_devtree_gen = 0;
352 }
353
354 rw_enter(&dv->sdev_contents, RW_WRITER);
355 sdev_set_nodestate(dv, SDEV_INIT);
356 rw_exit(&dv->sdev_contents);
357 *newdv = dv;
358
359 return (0);
360 }
361
362 /*
363 * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
364 * caller to transition the node to the SDEV_ZOMBIE state.
365 */
366 int
367 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
368 void *args, struct cred *cred)
369 {
370 int error = 0;
371 struct vnode *vp = SDEVTOV(dv);
372 vtype_t type;
373
374 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
375
376 type = vap->va_type;
377 vp->v_type = type;
378 vp->v_rdev = vap->va_rdev;
379 rw_enter(&dv->sdev_contents, RW_WRITER);
380 if (type == VDIR) {
381 dv->sdev_nlink = 2;
382 dv->sdev_flags &= ~SDEV_PERSIST;
383 dv->sdev_flags &= ~SDEV_DYNAMIC;
384 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
385 ASSERT(dv->sdev_dotdot);
386 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
387 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
388 avl_create(&dv->sdev_entries,
389 (int (*)(const void *, const void *))sdev_compare_nodes,
390 sizeof (struct sdev_node),
391 offsetof(struct sdev_node, sdev_avllink));
392 } else if (type == VLNK) {
393 ASSERT(args);
394 dv->sdev_nlink = 1;
395 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
396 } else {
397 dv->sdev_nlink = 1;
398 }
399 sdev_plugin_nodeready(dv);
400
401 if (!(SDEV_IS_GLOBAL(dv))) {
402 dv->sdev_origin = (struct sdev_node *)args;
403 dv->sdev_flags &= ~SDEV_PERSIST;
404 }
405
406 /*
407 * shadow node is created here OR
408 * if failed (indicated by dv->sdev_attrvp == NULL),
409 * created later in sdev_setattr
410 */
411 if (avp) {
412 dv->sdev_attrvp = avp;
413 } else {
414 if (dv->sdev_attr == NULL) {
415 sdev_attr_alloc(dv, vap);
416 } else {
417 sdev_attr_update(dv, vap);
418 }
419
420 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
421 error = sdev_shadow_node(dv, cred);
422 }
423
424 if (error == 0) {
425 /* transition to READY state */
426 sdev_set_nodestate(dv, SDEV_READY);
427 sdev_nc_node_exists(dv);
428 }
429 rw_exit(&dv->sdev_contents);
430 return (error);
431 }
432
433 /*
434 * Build the VROOT sdev_node.
435 */
436 /*ARGSUSED*/
437 struct sdev_node *
438 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
439 struct vnode *avp, struct cred *cred)
440 {
441 struct sdev_node *dv;
442 struct vnode *vp;
443 char devdir[] = "/dev";
444
445 ASSERT(sdev_node_cache != NULL);
446 ASSERT(avp);
447 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
448 vp = SDEVTOV(dv);
449 vn_reinit(vp);
450 vp->v_flag |= VROOT;
451 vp->v_vfsp = vfsp;
452 vp->v_type = VDIR;
453 vp->v_rdev = devdev;
454 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
455 vn_exists(vp);
456
457 if (vfsp->vfs_mntpt)
458 dv->sdev_name = i_ddi_strdup(
459 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
460 else
461 /* vfs_mountdev1 set mount point later */
462 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
463 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
464 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
465 dv->sdev_ino = SDEV_ROOTINO;
466 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */
467 dv->sdev_dotdot = dv; /* .. == self */
468 dv->sdev_attrvp = avp;
469 dv->sdev_attr = NULL;
470 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
471 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
472 if (strcmp(dv->sdev_name, "/dev") == 0) {
473 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
474 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
475 dv->sdev_gdir_gen = 0;
476 } else {
477 dv->sdev_flags = SDEV_BUILD;
478 dv->sdev_flags &= ~SDEV_PERSIST;
479 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
480 dv->sdev_ldir_gen = 0;
481 dv->sdev_devtree_gen = 0;
482 }
483
484 avl_create(&dv->sdev_entries,
485 (int (*)(const void *, const void *))sdev_compare_nodes,
486 sizeof (struct sdev_node),
487 offsetof(struct sdev_node, sdev_avllink));
488
489 rw_enter(&dv->sdev_contents, RW_WRITER);
490 sdev_set_nodestate(dv, SDEV_READY);
491 rw_exit(&dv->sdev_contents);
492 sdev_nc_node_exists(dv);
493 return (dv);
494 }
495
496 struct sdev_vop_table vtab[] = {
497 { "pts", devpts_vnodeops_tbl, &devpts_vnodeops, devpts_validate,
498 SDEV_DYNAMIC | SDEV_VTOR },
499
500 { "vt", devvt_vnodeops_tbl, &devvt_vnodeops, devvt_validate,
501 SDEV_DYNAMIC | SDEV_VTOR },
502
503 { "zvol", devzvol_vnodeops_tbl, &devzvol_vnodeops,
504 devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
505
506 { "zcons", NULL, NULL, NULL, SDEV_NO_NCACHE },
507
508 { "net", devnet_vnodeops_tbl, &devnet_vnodeops, devnet_validate,
509 SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
510
511 { "ipnet", devipnet_vnodeops_tbl, &devipnet_vnodeops,
512 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
513
514 /*
515 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
516 * lofi driver controls child nodes.
517 *
518 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
519 * stale nodes (e.g. from devfsadm -R).
520 *
521 * In addition, devfsadm knows not to attempt a rmdir: a zone
522 * may hold a reference, which would zombify the node,
523 * preventing a mkdir.
524 */
525
526 { "lofi", NULL, NULL, NULL,
527 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
528 { "rlofi", NULL, NULL, NULL,
529 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
530
531 { NULL, NULL, NULL, NULL, 0}
532 };
533
534
535 /*
536 * Build the base root inode
537 */
538 ino_t
539 sdev_mkino(struct sdev_node *dv)
540 {
541 ino_t ino;
542
543 /*
544 * for now, follow the lead of tmpfs here
545 * need to someday understand the requirements here
546 */
547 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
548 ino += SDEV_ROOTINO + 1;
549
550 return (ino);
551 }
552
553 int
554 sdev_getlink(struct vnode *linkvp, char **link)
555 {
556 int err;
557 char *buf;
558 struct uio uio = {0};
559 struct iovec iov = {0};
560
561 if (linkvp == NULL)
562 return (ENOENT);
563 ASSERT(linkvp->v_type == VLNK);
564
565 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
566 iov.iov_base = buf;
567 iov.iov_len = MAXPATHLEN;
568 uio.uio_iov = &iov;
569 uio.uio_iovcnt = 1;
570 uio.uio_resid = MAXPATHLEN;
571 uio.uio_segflg = UIO_SYSSPACE;
572 uio.uio_llimit = MAXOFFSET_T;
573
574 err = VOP_READLINK(linkvp, &uio, kcred, NULL);
575 if (err) {
576 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
577 kmem_free(buf, MAXPATHLEN);
578 return (ENOENT);
579 }
580
581 /* mission complete */
582 *link = i_ddi_strdup(buf, KM_SLEEP);
583 kmem_free(buf, MAXPATHLEN);
584 return (0);
585 }
586
587 /*
588 * A convenient wrapper to get the devfs node vnode for a device
589 * minor functionality: readlink() of a /dev symlink
590 * Place the link into dv->sdev_symlink
591 */
592 static int
593 sdev_follow_link(struct sdev_node *dv)
594 {
595 int err;
596 struct vnode *linkvp;
597 char *link = NULL;
598
599 linkvp = SDEVTOV(dv);
600 if (linkvp == NULL)
601 return (ENOENT);
602 ASSERT(linkvp->v_type == VLNK);
603 err = sdev_getlink(linkvp, &link);
604 if (err) {
605 dv->sdev_symlink = NULL;
606 return (ENOENT);
607 }
608
609 ASSERT(link != NULL);
610 dv->sdev_symlink = link;
611 return (0);
612 }
613
614 static int
615 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
616 {
617 vtype_t otype = SDEVTOV(dv)->v_type;
618
619 /*
620 * existing sdev_node has a different type.
621 */
622 if (otype != nvap->va_type) {
623 sdcmn_err9(("sdev_node_check: existing node "
624 " %s type %d does not match new node type %d\n",
625 dv->sdev_name, otype, nvap->va_type));
626 return (EEXIST);
627 }
628
629 /*
630 * For a symlink, the target should be the same.
631 */
632 if (otype == VLNK) {
633 ASSERT(nargs != NULL);
634 ASSERT(dv->sdev_symlink != NULL);
635 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
636 sdcmn_err9(("sdev_node_check: existing node "
637 " %s has different symlink %s as new node "
638 " %s\n", dv->sdev_name, dv->sdev_symlink,
639 (char *)nargs));
640 return (EEXIST);
641 }
642 }
643
644 return (0);
645 }
646
647 /*
648 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
649 *
650 * arguments:
651 * - ddv (parent)
652 * - nm (child name)
653 * - newdv (sdev_node for nm is returned here)
654 * - vap (vattr for the node to be created, va_type should be set.
655 * - avp (attribute vnode)
656 * the defaults should be used if unknown)
657 * - cred
658 * - args
659 * . tnm (for VLNK)
660 * . global sdev_node (for !SDEV_GLOBAL)
661 * - state: SDEV_INIT, SDEV_READY
662 *
663 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
664 *
665 * NOTE: directory contents writers lock needs to be held before
666 * calling this routine.
667 */
668 int
669 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
670 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
671 sdev_node_state_t state)
672 {
673 int error = 0;
674 sdev_node_state_t node_state;
675 struct sdev_node *dv = NULL;
676
677 ASSERT(state != SDEV_ZOMBIE);
678 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
679
680 if (*newdv) {
681 dv = *newdv;
682 } else {
683 /* allocate and initialize a sdev_node */
684 if (ddv->sdev_state == SDEV_ZOMBIE) {
685 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
686 ddv->sdev_path));
687 return (ENOENT);
688 }
689
690 error = sdev_nodeinit(ddv, nm, &dv, vap);
691 if (error != 0) {
692 sdcmn_err9(("sdev_mknode: error %d,"
693 " name %s can not be initialized\n",
694 error, nm));
695 return (error);
696 }
697 ASSERT(dv);
698
699 /* insert into the directory cache */
700 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
701 }
702
703 ASSERT(dv);
704 node_state = dv->sdev_state;
705 ASSERT(node_state != SDEV_ZOMBIE);
706
707 if (state == SDEV_READY) {
708 switch (node_state) {
709 case SDEV_INIT:
710 error = sdev_nodeready(dv, vap, avp, args, cred);
711 if (error) {
712 sdcmn_err9(("sdev_mknode: node %s can NOT"
713 " be transitioned into READY state, "
714 "error %d\n", nm, error));
715 }
716 break;
717 case SDEV_READY:
718 /*
719 * Do some sanity checking to make sure
720 * the existing sdev_node is what has been
721 * asked for.
722 */
723 error = sdev_node_check(dv, vap, args);
724 break;
725 default:
726 break;
727 }
728 }
729
730 if (!error) {
731 *newdv = dv;
732 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
733 } else {
734 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
735 /*
736 * We created this node, it wasn't passed into us. Therefore it
737 * is up to us to delete it.
738 */
739 if (*newdv == NULL)
740 SDEV_SIMPLE_RELE(dv);
741 *newdv = NULL;
742 }
743
744 return (error);
745 }
746
747 /*
748 * convenient wrapper to change vp's ATIME, CTIME and MTIME
749 */
750 void
751 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
752 {
753 struct vattr attr;
754 timestruc_t now;
755 int err;
756
757 ASSERT(vp);
758 gethrestime(&now);
759 if (mask & AT_CTIME)
760 attr.va_ctime = now;
761 if (mask & AT_MTIME)
762 attr.va_mtime = now;
763 if (mask & AT_ATIME)
764 attr.va_atime = now;
765
766 attr.va_mask = (mask & AT_TIMES);
767 err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
768 if (err && (err != EROFS)) {
769 sdcmn_err(("update timestamps error %d\n", err));
770 }
771 }
772
773 /*
774 * the backing store vnode is released here
775 */
776 /*ARGSUSED1*/
777 void
778 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
779 {
780 /* no references */
781 ASSERT(dv->sdev_nlink == 0);
782
783 if (dv->sdev_attrvp != NULLVP) {
784 VN_RELE(dv->sdev_attrvp);
785 /*
786 * reset the attrvp so that no more
787 * references can be made on this already
788 * vn_rele() vnode
789 */
790 dv->sdev_attrvp = NULLVP;
791 }
792
793 if (dv->sdev_attr != NULL) {
794 kmem_free(dv->sdev_attr, sizeof (struct vattr));
795 dv->sdev_attr = NULL;
796 }
797
798 if (dv->sdev_name != NULL) {
799 kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
800 dv->sdev_name = NULL;
801 }
802
803 if (dv->sdev_symlink != NULL) {
804 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
805 dv->sdev_symlink = NULL;
806 }
807
808 if (dv->sdev_path) {
809 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
810 dv->sdev_path = NULL;
811 }
812
813 if (!SDEV_IS_GLOBAL(dv)) {
814 sdev_prof_free(dv);
815 if (dv->sdev_vnode->v_type != VLNK && dv->sdev_origin != NULL)
816 SDEV_RELE(dv->sdev_origin);
817 }
818
819 if (SDEVTOV(dv)->v_type == VDIR) {
820 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
821 avl_destroy(&dv->sdev_entries);
822 }
823
824 mutex_destroy(&dv->sdev_lookup_lock);
825 cv_destroy(&dv->sdev_lookup_cv);
826
827 /* return node to initial state as per constructor */
828 (void) memset((void *)&dv->sdev_instance_data, 0,
829 sizeof (dv->sdev_instance_data));
830 vn_invalid(SDEVTOV(dv));
831 dv->sdev_private = NULL;
832 kmem_cache_free(sdev_node_cache, dv);
833 }
834
835 /*
836 * DIRECTORY CACHE lookup
837 */
838 struct sdev_node *
839 sdev_findbyname(struct sdev_node *ddv, char *nm)
840 {
841 struct sdev_node *dv;
842 struct sdev_node dvtmp;
843 avl_index_t where;
844
845 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
846
847 dvtmp.sdev_name = nm;
848 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
849 if (dv) {
850 ASSERT(dv->sdev_dotdot == ddv);
851 ASSERT(strcmp(dv->sdev_name, nm) == 0);
852 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
853 SDEV_HOLD(dv);
854 return (dv);
855 }
856 return (NULL);
857 }
858
859 /*
860 * Inserts a new sdev_node in a parent directory
861 */
862 void
863 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
864 {
865 avl_index_t where;
866
867 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
868 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
869 ASSERT(ddv->sdev_nlink >= 2);
870 ASSERT(dv->sdev_nlink == 0);
871 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
872
873 dv->sdev_dotdot = ddv;
874 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
875 avl_insert(&ddv->sdev_entries, dv, where);
876 ddv->sdev_nlink++;
877 }
878
879 /*
880 * The following check is needed because while sdev_nodes are linked
881 * in SDEV_INIT state, they have their link counts incremented only
882 * in SDEV_READY state.
883 */
884 static void
885 decr_link(struct sdev_node *dv)
886 {
887 VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
888 if (dv->sdev_state != SDEV_INIT) {
889 VERIFY(dv->sdev_nlink >= 1);
890 dv->sdev_nlink--;
891 } else {
892 VERIFY(dv->sdev_nlink == 0);
893 }
894 }
895
896 /*
897 * Delete an existing dv from directory cache
898 *
899 * In the case of a node is still held by non-zero reference count, the node is
900 * put into ZOMBIE state. The node is always unlinked from its parent, but it is
901 * not destroyed via sdev_inactive until its reference count reaches "0".
902 */
903 static void
904 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
905 {
906 struct vnode *vp;
907 sdev_node_state_t os;
908
909 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
910
911 vp = SDEVTOV(dv);
912 mutex_enter(&vp->v_lock);
913 rw_enter(&dv->sdev_contents, RW_WRITER);
914 os = dv->sdev_state;
915 ASSERT(os != SDEV_ZOMBIE);
916 dv->sdev_state = SDEV_ZOMBIE;
917
918 /*
919 * unlink ourselves from the parent directory now to take care of the ..
920 * link. However, if we're a directory, we don't remove our reference to
921 * ourself eg. '.' until we are torn down in the inactive callback.
922 */
923 decr_link(ddv);
924 avl_remove(&ddv->sdev_entries, dv);
925 /*
926 * sdev_inactive expects nodes to have a link to themselves when we're
927 * tearing them down. If we're transitioning from the initial state to
928 * zombie and not via ready, then we're not going to have this link that
929 * comes from the node being ready. As a result, we need to increment
930 * our link count by one to account for this.
931 */
932 if (os == SDEV_INIT && dv->sdev_nlink == 0)
933 dv->sdev_nlink++;
934 rw_exit(&dv->sdev_contents);
935 mutex_exit(&vp->v_lock);
936 }
937
938 /*
939 * check if the source is in the path of the target
940 *
941 * source and target are different
942 */
943 /*ARGSUSED2*/
944 static int
945 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
946 {
947 int error = 0;
948 struct sdev_node *dotdot, *dir;
949
950 dotdot = tdv->sdev_dotdot;
951 ASSERT(dotdot);
952
953 /* fs root */
954 if (dotdot == tdv) {
955 return (0);
956 }
957
958 for (;;) {
959 /*
960 * avoid error cases like
961 * mv a a/b
962 * mv a a/b/c
963 * etc.
964 */
965 if (dotdot == sdv) {
966 error = EINVAL;
967 break;
968 }
969
970 dir = dotdot;
971 dotdot = dir->sdev_dotdot;
972
973 /* done checking because root is reached */
974 if (dir == dotdot) {
975 break;
976 }
977 }
978 return (error);
979 }
980
981 int
982 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
983 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
984 struct cred *cred)
985 {
986 int error = 0;
987 struct vnode *ovp = SDEVTOV(odv);
988 struct vnode *nvp;
989 struct vattr vattr;
990 int doingdir = (ovp->v_type == VDIR);
991 char *link = NULL;
992 int samedir = (oddv == nddv) ? 1 : 0;
993 int bkstore = 0;
994 struct sdev_node *idv = NULL;
995 struct sdev_node *ndv = NULL;
996 timestruc_t now;
997
998 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
999 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1000 if (error)
1001 return (error);
1002
1003 if (!samedir)
1004 rw_enter(&oddv->sdev_contents, RW_WRITER);
1005 rw_enter(&nddv->sdev_contents, RW_WRITER);
1006
1007 /*
1008 * the source may have been deleted by another thread before
1009 * we gets here.
1010 */
1011 if (odv->sdev_state != SDEV_READY) {
1012 error = ENOENT;
1013 goto err_out;
1014 }
1015
1016 if (doingdir && (odv == nddv)) {
1017 error = EINVAL;
1018 goto err_out;
1019 }
1020
1021 /*
1022 * If renaming a directory, and the parents are different (".." must be
1023 * changed) then the source dir must not be in the dir hierarchy above
1024 * the target since it would orphan everything below the source dir.
1025 */
1026 if (doingdir && (oddv != nddv)) {
1027 error = sdev_checkpath(odv, nddv, cred);
1028 if (error)
1029 goto err_out;
1030 }
1031
1032 /* fix the source for a symlink */
1033 if (vattr.va_type == VLNK) {
1034 if (odv->sdev_symlink == NULL) {
1035 error = sdev_follow_link(odv);
1036 if (error) {
1037 /*
1038 * The underlying symlink doesn't exist. This
1039 * node probably shouldn't even exist. While
1040 * it's a bit jarring to consumers, we're going
1041 * to remove the node from /dev.
1042 */
1043 if (SDEV_IS_PERSIST((*ndvp)))
1044 bkstore = 1;
1045 sdev_dirdelete(oddv, odv);
1046 if (bkstore) {
1047 ASSERT(nddv->sdev_attrvp);
1048 error = VOP_REMOVE(nddv->sdev_attrvp,
1049 nnm, cred, NULL, 0);
1050 if (error)
1051 goto err_out;
1052 }
1053 error = ENOENT;
1054 goto err_out;
1055 }
1056 }
1057 ASSERT(odv->sdev_symlink);
1058 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1059 }
1060
1061 /* destination existing */
1062 if (*ndvp) {
1063 nvp = SDEVTOV(*ndvp);
1064 ASSERT(nvp);
1065
1066 /* handling renaming to itself */
1067 if (odv == *ndvp) {
1068 error = 0;
1069 goto err_out;
1070 }
1071
1072 if (nvp->v_type == VDIR) {
1073 if (!doingdir) {
1074 error = EISDIR;
1075 goto err_out;
1076 }
1077
1078 if (vn_vfswlock(nvp)) {
1079 error = EBUSY;
1080 goto err_out;
1081 }
1082
1083 if (vn_mountedvfs(nvp) != NULL) {
1084 vn_vfsunlock(nvp);
1085 error = EBUSY;
1086 goto err_out;
1087 }
1088
1089 /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1090 if ((*ndvp)->sdev_nlink > 2) {
1091 vn_vfsunlock(nvp);
1092 error = EEXIST;
1093 goto err_out;
1094 }
1095 vn_vfsunlock(nvp);
1096
1097 /*
1098 * We did not place the hold on *ndvp, so even though
1099 * we're deleting the node, we should not get rid of our
1100 * reference.
1101 */
1102 sdev_dirdelete(nddv, *ndvp);
1103 *ndvp = NULL;
1104 ASSERT(nddv->sdev_attrvp);
1105 error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1106 nddv->sdev_attrvp, cred, NULL, 0);
1107 if (error)
1108 goto err_out;
1109 } else {
1110 if (doingdir) {
1111 error = ENOTDIR;
1112 goto err_out;
1113 }
1114
1115 if (SDEV_IS_PERSIST((*ndvp))) {
1116 bkstore = 1;
1117 }
1118
1119 /*
1120 * Get rid of the node from the directory cache note.
1121 * Don't forget that it's not up to us to remove the vn
1122 * ref on the sdev node, as we did not place it.
1123 */
1124 sdev_dirdelete(nddv, *ndvp);
1125 *ndvp = NULL;
1126 if (bkstore) {
1127 ASSERT(nddv->sdev_attrvp);
1128 error = VOP_REMOVE(nddv->sdev_attrvp,
1129 nnm, cred, NULL, 0);
1130 if (error)
1131 goto err_out;
1132 }
1133 }
1134 }
1135
1136 /*
1137 * make a fresh node from the source attrs
1138 */
1139 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1140 error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1141 NULL, (void *)link, cred, SDEV_READY);
1142
1143 if (link != NULL) {
1144 kmem_free(link, strlen(link) + 1);
1145 link = NULL;
1146 }
1147
1148 if (error)
1149 goto err_out;
1150 ASSERT(*ndvp);
1151 ASSERT((*ndvp)->sdev_state == SDEV_READY);
1152
1153 /* move dir contents */
1154 if (doingdir) {
1155 for (idv = SDEV_FIRST_ENTRY(odv); idv;
1156 idv = SDEV_NEXT_ENTRY(odv, idv)) {
1157 SDEV_HOLD(idv);
1158 error = sdev_rnmnode(odv, idv,
1159 (struct sdev_node *)(*ndvp), &ndv,
1160 idv->sdev_name, cred);
1161 SDEV_RELE(idv);
1162 if (error)
1163 goto err_out;
1164 ndv = NULL;
1165 }
1166 }
1167
1168 if ((*ndvp)->sdev_attrvp) {
1169 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1170 AT_CTIME|AT_ATIME);
1171 } else {
1172 ASSERT((*ndvp)->sdev_attr);
1173 gethrestime(&now);
1174 (*ndvp)->sdev_attr->va_ctime = now;
1175 (*ndvp)->sdev_attr->va_atime = now;
1176 }
1177
1178 if (nddv->sdev_attrvp) {
1179 sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1180 AT_MTIME|AT_ATIME);
1181 } else {
1182 ASSERT(nddv->sdev_attr);
1183 gethrestime(&now);
1184 nddv->sdev_attr->va_mtime = now;
1185 nddv->sdev_attr->va_atime = now;
1186 }
1187 rw_exit(&nddv->sdev_contents);
1188 if (!samedir)
1189 rw_exit(&oddv->sdev_contents);
1190
1191 SDEV_RELE(*ndvp);
1192 return (error);
1193
1194 err_out:
1195 if (link != NULL) {
1196 kmem_free(link, strlen(link) + 1);
1197 link = NULL;
1198 }
1199
1200 rw_exit(&nddv->sdev_contents);
1201 if (!samedir)
1202 rw_exit(&oddv->sdev_contents);
1203 return (error);
1204 }
1205
1206 /*
1207 * Merge sdev_node specific information into an attribute structure.
1208 *
1209 * note: sdev_node is not locked here
1210 */
1211 void
1212 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1213 {
1214 struct vnode *vp = SDEVTOV(dv);
1215
1216 vap->va_nlink = dv->sdev_nlink;
1217 vap->va_nodeid = dv->sdev_ino;
1218 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1219 vap->va_type = vp->v_type;
1220
1221 if (vp->v_type == VDIR) {
1222 vap->va_rdev = 0;
1223 vap->va_fsid = vp->v_rdev;
1224 } else if (vp->v_type == VLNK) {
1225 vap->va_rdev = 0;
1226 vap->va_mode &= ~S_IFMT;
1227 vap->va_mode |= S_IFLNK;
1228 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1229 vap->va_rdev = vp->v_rdev;
1230 vap->va_mode &= ~S_IFMT;
1231 if (vap->va_type == VCHR)
1232 vap->va_mode |= S_IFCHR;
1233 else
1234 vap->va_mode |= S_IFBLK;
1235 } else {
1236 vap->va_rdev = 0;
1237 }
1238 }
1239
1240 struct vattr *
1241 sdev_getdefault_attr(enum vtype type)
1242 {
1243 if (type == VDIR)
1244 return (&sdev_vattr_dir);
1245 else if (type == VCHR)
1246 return (&sdev_vattr_chr);
1247 else if (type == VBLK)
1248 return (&sdev_vattr_blk);
1249 else if (type == VLNK)
1250 return (&sdev_vattr_lnk);
1251 else
1252 return (NULL);
1253 }
1254 int
1255 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1256 {
1257 int rv = 0;
1258 struct vnode *vp = SDEVTOV(dv);
1259
1260 switch (vp->v_type) {
1261 case VCHR:
1262 case VBLK:
1263 /*
1264 * If vnode is a device, return special vnode instead
1265 * (though it knows all about -us- via sp->s_realvp)
1266 */
1267 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1268 VN_RELE(vp);
1269 if (*vpp == NULLVP)
1270 rv = ENOSYS;
1271 break;
1272 default: /* most types are returned as is */
1273 *vpp = vp;
1274 break;
1275 }
1276 return (rv);
1277 }
1278
1279 /*
1280 * junction between devname and root file system, e.g. ufs
1281 */
1282 int
1283 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1284 {
1285 struct vnode *rdvp = ddv->sdev_attrvp;
1286 int rval = 0;
1287
1288 ASSERT(rdvp);
1289
1290 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1291 NULL);
1292 return (rval);
1293 }
1294
1295 static int
1296 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1297 {
1298 struct sdev_node *dv = NULL;
1299 char *nm;
1300 struct vnode *dirvp;
1301 int error;
1302 vnode_t *vp;
1303 int eof;
1304 struct iovec iov;
1305 struct uio uio;
1306 struct dirent64 *dp;
1307 dirent64_t *dbuf;
1308 size_t dbuflen;
1309 struct vattr vattr;
1310 char *link = NULL;
1311
1312 if (ddv->sdev_attrvp == NULL)
1313 return (0);
1314 if (!(ddv->sdev_flags & SDEV_BUILD))
1315 return (0);
1316
1317 dirvp = ddv->sdev_attrvp;
1318 VN_HOLD(dirvp);
1319 dbuf = kmem_zalloc(dlen, KM_SLEEP);
1320
1321 uio.uio_iov = &iov;
1322 uio.uio_iovcnt = 1;
1323 uio.uio_segflg = UIO_SYSSPACE;
1324 uio.uio_fmode = 0;
1325 uio.uio_extflg = UIO_COPY_CACHED;
1326 uio.uio_loffset = 0;
1327 uio.uio_llimit = MAXOFFSET_T;
1328
1329 eof = 0;
1330 error = 0;
1331 while (!error && !eof) {
1332 uio.uio_resid = dlen;
1333 iov.iov_base = (char *)dbuf;
1334 iov.iov_len = dlen;
1335 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1336 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1337 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1338
1339 dbuflen = dlen - uio.uio_resid;
1340 if (error || dbuflen == 0)
1341 break;
1342
1343 if (!(ddv->sdev_flags & SDEV_BUILD))
1344 break;
1345
1346 for (dp = dbuf; ((intptr_t)dp <
1347 (intptr_t)dbuf + dbuflen);
1348 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1349 nm = dp->d_name;
1350
1351 if (strcmp(nm, ".") == 0 ||
1352 strcmp(nm, "..") == 0)
1353 continue;
1354
1355 vp = NULLVP;
1356 dv = sdev_cache_lookup(ddv, nm);
1357 if (dv) {
1358 VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1359 SDEV_SIMPLE_RELE(dv);
1360 continue;
1361 }
1362
1363 /* refill the cache if not already */
1364 error = devname_backstore_lookup(ddv, nm, &vp);
1365 if (error)
1366 continue;
1367
1368 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1369 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1370 if (error)
1371 continue;
1372
1373 if (vattr.va_type == VLNK) {
1374 error = sdev_getlink(vp, &link);
1375 if (error) {
1376 continue;
1377 }
1378 ASSERT(link != NULL);
1379 }
1380
1381 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1382 rw_exit(&ddv->sdev_contents);
1383 rw_enter(&ddv->sdev_contents, RW_WRITER);
1384 }
1385 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1386 cred, SDEV_READY);
1387 rw_downgrade(&ddv->sdev_contents);
1388
1389 if (link != NULL) {
1390 kmem_free(link, strlen(link) + 1);
1391 link = NULL;
1392 }
1393
1394 if (!error) {
1395 ASSERT(dv);
1396 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1397 SDEV_SIMPLE_RELE(dv);
1398 }
1399 vp = NULL;
1400 dv = NULL;
1401 }
1402 }
1403
1404 done:
1405 VN_RELE(dirvp);
1406 kmem_free(dbuf, dlen);
1407
1408 return (error);
1409 }
1410
1411 void
1412 sdev_filldir_dynamic(struct sdev_node *ddv)
1413 {
1414 int error;
1415 int i;
1416 struct vattr vattr;
1417 struct vattr *vap = &vattr;
1418 char *nm = NULL;
1419 struct sdev_node *dv = NULL;
1420
1421 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1422 ASSERT((ddv->sdev_flags & SDEV_BUILD));
1423
1424 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */
1425 gethrestime(&vap->va_atime);
1426 vap->va_mtime = vap->va_atime;
1427 vap->va_ctime = vap->va_atime;
1428 for (i = 0; vtab[i].vt_name != NULL; i++) {
1429 /*
1430 * This early, we may be in a read-only /dev environment: leave
1431 * the creation of any nodes we'd attempt to persist to
1432 * devfsadm. Because /dev itself is normally persistent, any
1433 * node which is not marked dynamic will end up being marked
1434 * persistent. However, some nodes are both dynamic and
1435 * persistent, mostly lofi and rlofi, so we need to be careful
1436 * in our check.
1437 */
1438 if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1439 !(vtab[i].vt_flags & SDEV_DYNAMIC))
1440 continue;
1441 nm = vtab[i].vt_name;
1442 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1443 dv = NULL;
1444 error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1445 NULL, kcred, SDEV_READY);
1446 if (error) {
1447 cmn_err(CE_WARN, "%s/%s: error %d\n",
1448 ddv->sdev_name, nm, error);
1449 } else {
1450 ASSERT(dv);
1451 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1452 SDEV_SIMPLE_RELE(dv);
1453 }
1454 }
1455 }
1456
1457 /*
1458 * Creating a backing store entry based on sdev_attr.
1459 * This is called either as part of node creation in a persistent directory
1460 * or from setattr/setsecattr to persist access attributes across reboot.
1461 */
1462 int
1463 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1464 {
1465 int error = 0;
1466 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1467 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1468 struct vattr *vap = dv->sdev_attr;
1469 char *nm = dv->sdev_name;
1470 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1471
1472 ASSERT(dv && dv->sdev_name && rdvp);
1473 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1474
1475 lookup:
1476 /* try to find it in the backing store */
1477 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1478 NULL);
1479 if (error == 0) {
1480 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1481 VN_HOLD(rrvp);
1482 VN_RELE(*rvp);
1483 *rvp = rrvp;
1484 }
1485
1486 kmem_free(dv->sdev_attr, sizeof (vattr_t));
1487 dv->sdev_attr = NULL;
1488 dv->sdev_attrvp = *rvp;
1489 return (0);
1490 }
1491
1492 /* let's try to persist the node */
1493 gethrestime(&vap->va_atime);
1494 vap->va_mtime = vap->va_atime;
1495 vap->va_ctime = vap->va_atime;
1496 vap->va_mask |= AT_TYPE|AT_MODE;
1497 switch (vap->va_type) {
1498 case VDIR:
1499 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1500 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1501 (void *)(*rvp), error));
1502 if (!error)
1503 VN_RELE(*rvp);
1504 break;
1505 case VCHR:
1506 case VBLK:
1507 case VREG:
1508 case VDOOR:
1509 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1510 rvp, cred, 0, NULL, NULL);
1511 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1512 (void *)(*rvp), error));
1513 if (!error)
1514 VN_RELE(*rvp);
1515 break;
1516 case VLNK:
1517 ASSERT(dv->sdev_symlink);
1518 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1519 NULL, 0);
1520 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1521 error));
1522 break;
1523 default:
1524 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1525 "create\n", nm);
1526 /*NOTREACHED*/
1527 }
1528
1529 /* go back to lookup to factor out spec node and set attrvp */
1530 if (error == 0)
1531 goto lookup;
1532
1533 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1534 return (error);
1535 }
1536
1537 static void
1538 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1539 {
1540 struct sdev_node *dup = NULL;
1541
1542 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1543 if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1544 sdev_direnter(ddv, *dv);
1545 } else {
1546 VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1547 SDEV_SIMPLE_RELE(*dv);
1548 sdev_nodedestroy(*dv, 0);
1549 *dv = dup;
1550 }
1551 }
1552
1553 static void
1554 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1555 {
1556 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1557 sdev_dirdelete(ddv, *dv);
1558 }
1559
1560 /*
1561 * update the in-core directory cache
1562 */
1563 void
1564 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1565 sdev_cache_ops_t ops)
1566 {
1567 ASSERT((SDEV_HELD(*dv)));
1568
1569 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1570 switch (ops) {
1571 case SDEV_CACHE_ADD:
1572 sdev_cache_add(ddv, dv, nm);
1573 break;
1574 case SDEV_CACHE_DELETE:
1575 sdev_cache_delete(ddv, dv);
1576 break;
1577 default:
1578 break;
1579 }
1580 }
1581
1582 /*
1583 * retrieve the named entry from the directory cache
1584 */
1585 struct sdev_node *
1586 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1587 {
1588 struct sdev_node *dv = NULL;
1589
1590 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1591 dv = sdev_findbyname(ddv, nm);
1592
1593 return (dv);
1594 }
1595
1596 /*
1597 * Implicit reconfig for nodes constructed by a link generator
1598 * Start devfsadm if needed, or if devfsadm is in progress,
1599 * prepare to block on devfsadm either completing or
1600 * constructing the desired node. As devfsadmd is global
1601 * in scope, constructing all necessary nodes, we only
1602 * need to initiate it once.
1603 */
1604 static int
1605 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1606 {
1607 int error = 0;
1608
1609 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1610 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1611 ddv->sdev_name, nm, devfsadm_state));
1612 mutex_enter(&dv->sdev_lookup_lock);
1613 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1614 mutex_exit(&dv->sdev_lookup_lock);
1615 error = 0;
1616 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1617 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1618 ddv->sdev_name, nm, devfsadm_state));
1619
1620 sdev_devfsadmd_thread(ddv, dv, kcred);
1621 mutex_enter(&dv->sdev_lookup_lock);
1622 SDEV_BLOCK_OTHERS(dv,
1623 (SDEV_LOOKUP | SDEV_LGWAITING));
1624 mutex_exit(&dv->sdev_lookup_lock);
1625 error = 0;
1626 } else {
1627 error = -1;
1628 }
1629
1630 return (error);
1631 }
1632
1633 /*
1634 * Support for specialized device naming construction mechanisms
1635 */
1636 static int
1637 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1638 int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1639 void *, char *), int flags, struct cred *cred)
1640 {
1641 int rv = 0;
1642 char *physpath = NULL;
1643 struct vattr vattr;
1644 struct vattr *vap = &vattr;
1645 struct sdev_node *dv = NULL;
1646
1647 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1648 if (flags & SDEV_VLINK) {
1649 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1650 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1651 NULL);
1652 if (rv) {
1653 kmem_free(physpath, MAXPATHLEN);
1654 return (-1);
1655 }
1656
1657 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */
1658 vap->va_size = strlen(physpath);
1659 gethrestime(&vap->va_atime);
1660 vap->va_mtime = vap->va_atime;
1661 vap->va_ctime = vap->va_atime;
1662
1663 rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1664 (void *)physpath, cred, SDEV_READY);
1665 kmem_free(physpath, MAXPATHLEN);
1666 if (rv)
1667 return (rv);
1668 } else if (flags & SDEV_VATTR) {
1669 /*
1670 * /dev/pts
1671 *
1672 * callback is responsible to set the basic attributes,
1673 * e.g. va_type/va_uid/va_gid/
1674 * dev_t if VCHR or VBLK/
1675 */
1676 ASSERT(callback);
1677 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1678 if (rv) {
1679 sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1680 "callback failed \n"));
1681 return (-1);
1682 }
1683
1684 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1685 cred, SDEV_READY);
1686
1687 if (rv)
1688 return (rv);
1689
1690 } else {
1691 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1692 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1693 __LINE__));
1694 rv = -1;
1695 }
1696
1697 *dvp = dv;
1698 return (rv);
1699 }
1700
1701 static int
1702 is_devfsadm_thread(char *exec_name)
1703 {
1704 /*
1705 * note: because devfsadmd -> /usr/sbin/devfsadm
1706 * it is safe to use "devfsadm" to capture the lookups
1707 * from devfsadm and its daemon version.
1708 */
1709 if (strcmp(exec_name, "devfsadm") == 0)
1710 return (1);
1711 return (0);
1712 }
1713
1714 /*
1715 * Lookup Order:
1716 * sdev_node cache;
1717 * backing store (SDEV_PERSIST);
1718 * DBNR: a. dir_ops implemented in the loadable modules;
1719 * b. vnode ops in vtab.
1720 */
1721 int
1722 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1723 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1724 struct cred *, void *, char *), int flags)
1725 {
1726 int rv = 0, nmlen;
1727 struct vnode *rvp = NULL;
1728 struct sdev_node *dv = NULL;
1729 int retried = 0;
1730 int error = 0;
1731 struct vattr vattr;
1732 char *lookup_thread = curproc->p_user.u_comm;
1733 int failed_flags = 0;
1734 int (*vtor)(struct sdev_node *) = NULL;
1735 int state;
1736 int parent_state;
1737 char *link = NULL;
1738
1739 if (SDEVTOV(ddv)->v_type != VDIR)
1740 return (ENOTDIR);
1741
1742 /*
1743 * Empty name or ., return node itself.
1744 */
1745 nmlen = strlen(nm);
1746 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1747 *vpp = SDEVTOV(ddv);
1748 VN_HOLD(*vpp);
1749 return (0);
1750 }
1751
1752 /*
1753 * .., return the parent directory
1754 */
1755 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1756 *vpp = SDEVTOV(ddv->sdev_dotdot);
1757 VN_HOLD(*vpp);
1758 return (0);
1759 }
1760
1761 rw_enter(&ddv->sdev_contents, RW_READER);
1762 if (ddv->sdev_flags & SDEV_VTOR) {
1763 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1764 ASSERT(vtor);
1765 }
1766
1767 tryagain:
1768 /*
1769 * (a) directory cache lookup:
1770 */
1771 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1772 parent_state = ddv->sdev_state;
1773 dv = sdev_cache_lookup(ddv, nm);
1774 if (dv) {
1775 state = dv->sdev_state;
1776 switch (state) {
1777 case SDEV_INIT:
1778 if (is_devfsadm_thread(lookup_thread))
1779 break;
1780
1781 /* ZOMBIED parent won't allow node creation */
1782 if (parent_state == SDEV_ZOMBIE) {
1783 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1784 retried);
1785 goto nolock_notfound;
1786 }
1787
1788 mutex_enter(&dv->sdev_lookup_lock);
1789 /* compensate the threads started after devfsadm */
1790 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1791 !(SDEV_IS_LOOKUP(dv)))
1792 SDEV_BLOCK_OTHERS(dv,
1793 (SDEV_LOOKUP | SDEV_LGWAITING));
1794
1795 if (SDEV_IS_LOOKUP(dv)) {
1796 failed_flags |= SLF_REBUILT;
1797 rw_exit(&ddv->sdev_contents);
1798 error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1799 mutex_exit(&dv->sdev_lookup_lock);
1800 rw_enter(&ddv->sdev_contents, RW_READER);
1801
1802 if (error != 0) {
1803 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1804 retried);
1805 goto nolock_notfound;
1806 }
1807
1808 state = dv->sdev_state;
1809 if (state == SDEV_INIT) {
1810 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1811 retried);
1812 goto nolock_notfound;
1813 } else if (state == SDEV_READY) {
1814 goto found;
1815 } else if (state == SDEV_ZOMBIE) {
1816 rw_exit(&ddv->sdev_contents);
1817 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1818 retried);
1819 SDEV_RELE(dv);
1820 goto lookup_failed;
1821 }
1822 } else {
1823 mutex_exit(&dv->sdev_lookup_lock);
1824 }
1825 break;
1826 case SDEV_READY:
1827 goto found;
1828 case SDEV_ZOMBIE:
1829 rw_exit(&ddv->sdev_contents);
1830 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1831 SDEV_RELE(dv);
1832 goto lookup_failed;
1833 default:
1834 rw_exit(&ddv->sdev_contents);
1835 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1836 sdev_lookup_failed(ddv, nm, failed_flags);
1837 *vpp = NULLVP;
1838 return (ENOENT);
1839 }
1840 }
1841 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1842
1843 /*
1844 * ZOMBIED parent does not allow new node creation.
1845 * bail out early
1846 */
1847 if (parent_state == SDEV_ZOMBIE) {
1848 rw_exit(&ddv->sdev_contents);
1849 *vpp = NULLVP;
1850 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1851 return (ENOENT);
1852 }
1853
1854 /*
1855 * (b0): backing store lookup
1856 * SDEV_PERSIST is default except:
1857 * 1) pts nodes
1858 * 2) non-chmod'ed local nodes
1859 * 3) zvol nodes
1860 */
1861 if (SDEV_IS_PERSIST(ddv)) {
1862 error = devname_backstore_lookup(ddv, nm, &rvp);
1863
1864 if (!error) {
1865
1866 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1867 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
1868 if (error) {
1869 rw_exit(&ddv->sdev_contents);
1870 if (dv)
1871 SDEV_RELE(dv);
1872 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1873 sdev_lookup_failed(ddv, nm, failed_flags);
1874 *vpp = NULLVP;
1875 return (ENOENT);
1876 }
1877
1878 if (vattr.va_type == VLNK) {
1879 error = sdev_getlink(rvp, &link);
1880 if (error) {
1881 rw_exit(&ddv->sdev_contents);
1882 if (dv)
1883 SDEV_RELE(dv);
1884 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1885 retried);
1886 sdev_lookup_failed(ddv, nm,
1887 failed_flags);
1888 *vpp = NULLVP;
1889 return (ENOENT);
1890 }
1891 ASSERT(link != NULL);
1892 }
1893
1894 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1895 rw_exit(&ddv->sdev_contents);
1896 rw_enter(&ddv->sdev_contents, RW_WRITER);
1897 }
1898 error = sdev_mknode(ddv, nm, &dv, &vattr,
1899 rvp, link, cred, SDEV_READY);
1900 rw_downgrade(&ddv->sdev_contents);
1901
1902 if (link != NULL) {
1903 kmem_free(link, strlen(link) + 1);
1904 link = NULL;
1905 }
1906
1907 if (error) {
1908 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1909 rw_exit(&ddv->sdev_contents);
1910 if (dv)
1911 SDEV_RELE(dv);
1912 goto lookup_failed;
1913 } else {
1914 goto found;
1915 }
1916 } else if (retried) {
1917 rw_exit(&ddv->sdev_contents);
1918 sdcmn_err3(("retry of lookup of %s/%s: failed\n",
1919 ddv->sdev_name, nm));
1920 if (dv)
1921 SDEV_RELE(dv);
1922 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1923 sdev_lookup_failed(ddv, nm, failed_flags);
1924 *vpp = NULLVP;
1925 return (ENOENT);
1926 }
1927 }
1928
1929 lookup_create_node:
1930 /* first thread that is doing the lookup on this node */
1931 if (callback) {
1932 ASSERT(dv == NULL);
1933 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1934 rw_exit(&ddv->sdev_contents);
1935 rw_enter(&ddv->sdev_contents, RW_WRITER);
1936 }
1937 error = sdev_call_dircallback(ddv, &dv, nm, callback,
1938 flags, cred);
1939 rw_downgrade(&ddv->sdev_contents);
1940 if (error == 0) {
1941 goto found;
1942 } else {
1943 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1944 rw_exit(&ddv->sdev_contents);
1945 goto lookup_failed;
1946 }
1947 }
1948 if (!dv) {
1949 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1950 rw_exit(&ddv->sdev_contents);
1951 rw_enter(&ddv->sdev_contents, RW_WRITER);
1952 }
1953 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
1954 cred, SDEV_INIT);
1955 if (!dv) {
1956 rw_exit(&ddv->sdev_contents);
1957 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1958 sdev_lookup_failed(ddv, nm, failed_flags);
1959 *vpp = NULLVP;
1960 return (ENOENT);
1961 }
1962 rw_downgrade(&ddv->sdev_contents);
1963 }
1964
1965 /*
1966 * (b1) invoking devfsadm once per life time for devfsadm nodes
1967 */
1968 ASSERT(SDEV_HELD(dv));
1969
1970 if (SDEV_IS_NO_NCACHE(dv))
1971 failed_flags |= SLF_NO_NCACHE;
1972 if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
1973 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
1974 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
1975 ASSERT(SDEV_HELD(dv));
1976 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1977 goto nolock_notfound;
1978 }
1979
1980 /*
1981 * filter out known non-existent devices recorded
1982 * during initial reconfiguration boot for which
1983 * reconfig should not be done and lookup may
1984 * be short-circuited now.
1985 */
1986 if (sdev_lookup_filter(ddv, nm)) {
1987 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1988 goto nolock_notfound;
1989 }
1990
1991 /* bypassing devfsadm internal nodes */
1992 if (is_devfsadm_thread(lookup_thread)) {
1993 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1994 goto nolock_notfound;
1995 }
1996
1997 if (sdev_reconfig_disable) {
1998 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1999 goto nolock_notfound;
2000 }
2001
2002 error = sdev_call_devfsadmd(ddv, dv, nm);
2003 if (error == 0) {
2004 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2005 ddv->sdev_name, nm, curproc->p_user.u_comm));
2006 if (sdev_reconfig_verbose) {
2007 cmn_err(CE_CONT,
2008 "?lookup of %s/%s by %s: reconfig\n",
2009 ddv->sdev_name, nm, curproc->p_user.u_comm);
2010 }
2011 retried = 1;
2012 failed_flags |= SLF_REBUILT;
2013 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2014 SDEV_SIMPLE_RELE(dv);
2015 goto tryagain;
2016 } else {
2017 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2018 goto nolock_notfound;
2019 }
2020
2021 found:
2022 ASSERT(dv->sdev_state == SDEV_READY);
2023 if (vtor) {
2024 /*
2025 * Check validity of returned node
2026 */
2027 switch (vtor(dv)) {
2028 case SDEV_VTOR_VALID:
2029 break;
2030 case SDEV_VTOR_STALE:
2031 /*
2032 * The name exists, but the cache entry is
2033 * stale and needs to be re-created.
2034 */
2035 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2036 if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2037 rw_exit(&ddv->sdev_contents);
2038 rw_enter(&ddv->sdev_contents, RW_WRITER);
2039 }
2040 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2041 rw_downgrade(&ddv->sdev_contents);
2042 SDEV_RELE(dv);
2043 dv = NULL;
2044 goto lookup_create_node;
2045 /* FALLTHRU */
2046 case SDEV_VTOR_INVALID:
2047 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2048 sdcmn_err7(("lookup: destroy invalid "
2049 "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2050 goto nolock_notfound;
2051 case SDEV_VTOR_SKIP:
2052 sdcmn_err7(("lookup: node not applicable - "
2053 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2054 rw_exit(&ddv->sdev_contents);
2055 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2056 SDEV_RELE(dv);
2057 goto lookup_failed;
2058 default:
2059 cmn_err(CE_PANIC,
2060 "dev fs: validator failed: %s(%p)\n",
2061 dv->sdev_name, (void *)dv);
2062 break;
2063 }
2064 }
2065
2066 rw_exit(&ddv->sdev_contents);
2067 rv = sdev_to_vp(dv, vpp);
2068 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2069 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2070 dv->sdev_state, nm, rv));
2071 return (rv);
2072
2073 nolock_notfound:
2074 /*
2075 * Destroy the node that is created for synchronization purposes.
2076 */
2077 sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2078 nm, dv->sdev_state));
2079 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2080 if (dv->sdev_state == SDEV_INIT) {
2081 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2082 rw_exit(&ddv->sdev_contents);
2083 rw_enter(&ddv->sdev_contents, RW_WRITER);
2084 }
2085
2086 /*
2087 * Node state may have changed during the lock
2088 * changes. Re-check.
2089 */
2090 if (dv->sdev_state == SDEV_INIT) {
2091 sdev_dirdelete(ddv, dv);
2092 rw_exit(&ddv->sdev_contents);
2093 sdev_lookup_failed(ddv, nm, failed_flags);
2094 SDEV_RELE(dv);
2095 *vpp = NULL;
2096 return (ENOENT);
2097 }
2098 }
2099
2100 rw_exit(&ddv->sdev_contents);
2101 SDEV_RELE(dv);
2102
2103 lookup_failed:
2104 sdev_lookup_failed(ddv, nm, failed_flags);
2105 *vpp = NULL;
2106 return (ENOENT);
2107 }
2108
2109 /*
2110 * Given a directory node, mark all nodes beneath as
2111 * STALE, i.e. nodes that don't exist as far as new
2112 * consumers are concerned. Remove them from the
2113 * list of directory entries so that no lookup or
2114 * directory traversal will find them. The node
2115 * not deallocated so existing holds are not affected.
2116 */
2117 void
2118 sdev_stale(struct sdev_node *ddv)
2119 {
2120 struct sdev_node *dv;
2121 struct vnode *vp;
2122
2123 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2124
2125 rw_enter(&ddv->sdev_contents, RW_WRITER);
2126 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2127 vp = SDEVTOV(dv);
2128 SDEV_HOLD(dv);
2129 if (vp->v_type == VDIR)
2130 sdev_stale(dv);
2131
2132 sdev_dirdelete(ddv, dv);
2133 SDEV_RELE(dv);
2134 }
2135 ddv->sdev_flags |= SDEV_BUILD;
2136 rw_exit(&ddv->sdev_contents);
2137 }
2138
2139 /*
2140 * Given a directory node, clean out all the nodes beneath.
2141 * If expr is specified, clean node with names matching expr.
2142 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2143 * so they are excluded from future lookups.
2144 */
2145 int
2146 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2147 {
2148 int error = 0;
2149 int busy = 0;
2150 struct vnode *vp;
2151 struct sdev_node *dv, *next;
2152 int bkstore = 0;
2153 int len = 0;
2154 char *bks_name = NULL;
2155
2156 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2157
2158 /*
2159 * We try our best to destroy all unused sdev_node's
2160 */
2161 rw_enter(&ddv->sdev_contents, RW_WRITER);
2162 for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) {
2163 next = SDEV_NEXT_ENTRY(ddv, dv);
2164 vp = SDEVTOV(dv);
2165
2166 if (expr && gmatch(dv->sdev_name, expr) == 0)
2167 continue;
2168
2169 if (vp->v_type == VDIR &&
2170 sdev_cleandir(dv, NULL, flags) != 0) {
2171 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2172 dv->sdev_name));
2173 busy++;
2174 continue;
2175 }
2176
2177 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2178 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2179 dv->sdev_name));
2180 busy++;
2181 continue;
2182 }
2183
2184 /*
2185 * at this point, either dv is not held or SDEV_ENFORCE
2186 * is specified. In either case, dv needs to be deleted
2187 */
2188 SDEV_HOLD(dv);
2189
2190 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2191 if (bkstore && (vp->v_type == VDIR))
2192 bkstore += 1;
2193
2194 if (bkstore) {
2195 len = strlen(dv->sdev_name) + 1;
2196 bks_name = kmem_alloc(len, KM_SLEEP);
2197 bcopy(dv->sdev_name, bks_name, len);
2198 }
2199
2200 sdev_dirdelete(ddv, dv);
2201
2202 /* take care the backing store clean up */
2203 if (bkstore) {
2204 ASSERT(bks_name);
2205 ASSERT(ddv->sdev_attrvp);
2206
2207 if (bkstore == 1) {
2208 error = VOP_REMOVE(ddv->sdev_attrvp,
2209 bks_name, kcred, NULL, 0);
2210 } else if (bkstore == 2) {
2211 error = VOP_RMDIR(ddv->sdev_attrvp,
2212 bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2213 }
2214
2215 /* do not propagate the backing store errors */
2216 if (error) {
2217 sdcmn_err9(("sdev_cleandir: backing store"
2218 "not cleaned\n"));
2219 error = 0;
2220 }
2221
2222 bkstore = 0;
2223 kmem_free(bks_name, len);
2224 bks_name = NULL;
2225 len = 0;
2226 }
2227
2228 ddv->sdev_flags |= SDEV_BUILD;
2229 SDEV_RELE(dv);
2230 }
2231
2232 ddv->sdev_flags |= SDEV_BUILD;
2233 rw_exit(&ddv->sdev_contents);
2234
2235 if (busy) {
2236 error = EBUSY;
2237 }
2238
2239 return (error);
2240 }
2241
2242 /*
2243 * a convenient wrapper for readdir() funcs
2244 */
2245 size_t
2246 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2247 {
2248 size_t reclen = DIRENT64_RECLEN(strlen(nm));
2249 if (reclen > size)
2250 return (0);
2251
2252 de->d_ino = (ino64_t)ino;
2253 de->d_off = (off64_t)off + 1;
2254 de->d_reclen = (ushort_t)reclen;
2255 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2256 return (reclen);
2257 }
2258
2259 /*
2260 * sdev_mount service routines
2261 */
2262 int
2263 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2264 {
2265 int error;
2266
2267 if (uap->datalen != sizeof (*args))
2268 return (EINVAL);
2269
2270 if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2271 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2272 "get user data. error %d\n", error);
2273 return (EFAULT);
2274 }
2275
2276 return (0);
2277 }
2278
2279 #ifdef nextdp
2280 #undef nextdp
2281 #endif
2282 #define nextdp(dp) ((struct dirent64 *) \
2283 (intptr_t)((char *)(dp) + (dp)->d_reclen))
2284
2285 /*
2286 * readdir helper func
2287 */
2288 int
2289 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2290 int flags)
2291 {
2292 struct sdev_node *ddv = VTOSDEV(vp);
2293 struct sdev_node *dv;
2294 dirent64_t *dp;
2295 ulong_t outcount = 0;
2296 size_t namelen;
2297 ulong_t alloc_count;
2298 void *outbuf;
2299 struct iovec *iovp;
2300 int error = 0;
2301 size_t reclen;
2302 offset_t diroff;
2303 offset_t soff;
2304 int this_reclen;
2305 int (*vtor)(struct sdev_node *) = NULL;
2306 struct vattr attr;
2307 timestruc_t now;
2308
2309 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2310 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2311
2312 if (uiop->uio_loffset >= MAXOFF_T) {
2313 if (eofp)
2314 *eofp = 1;
2315 return (0);
2316 }
2317
2318 if (uiop->uio_iovcnt != 1)
2319 return (EINVAL);
2320
2321 if (vp->v_type != VDIR)
2322 return (ENOTDIR);
2323
2324 if (ddv->sdev_flags & SDEV_VTOR) {
2325 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2326 ASSERT(vtor);
2327 }
2328
2329 if (eofp != NULL)
2330 *eofp = 0;
2331
2332 soff = uiop->uio_loffset;
2333 iovp = uiop->uio_iov;
2334 alloc_count = iovp->iov_len;
2335 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2336 outcount = 0;
2337
2338 if (ddv->sdev_state == SDEV_ZOMBIE)
2339 goto get_cache;
2340
2341 if (SDEV_IS_GLOBAL(ddv)) {
2342
2343 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2344 !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2345 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2346 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2347 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2348 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2349 !sdev_reconfig_disable) {
2350 /*
2351 * invoking "devfsadm" to do system device reconfig
2352 */
2353 mutex_enter(&ddv->sdev_lookup_lock);
2354 SDEV_BLOCK_OTHERS(ddv,
2355 (SDEV_READDIR|SDEV_LGWAITING));
2356 mutex_exit(&ddv->sdev_lookup_lock);
2357
2358 sdcmn_err8(("readdir of %s by %s: reconfig\n",
2359 ddv->sdev_path, curproc->p_user.u_comm));
2360 if (sdev_reconfig_verbose) {
2361 cmn_err(CE_CONT,
2362 "?readdir of %s by %s: reconfig\n",
2363 ddv->sdev_path, curproc->p_user.u_comm);
2364 }
2365
2366 sdev_devfsadmd_thread(ddv, NULL, kcred);
2367 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2368 /*
2369 * compensate the "ls" started later than "devfsadm"
2370 */
2371 mutex_enter(&ddv->sdev_lookup_lock);
2372 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2373 mutex_exit(&ddv->sdev_lookup_lock);
2374 }
2375
2376 /*
2377 * release the contents lock so that
2378 * the cache may be updated by devfsadmd
2379 */
2380 rw_exit(&ddv->sdev_contents);
2381 mutex_enter(&ddv->sdev_lookup_lock);
2382 if (SDEV_IS_READDIR(ddv))
2383 (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2384 mutex_exit(&ddv->sdev_lookup_lock);
2385 rw_enter(&ddv->sdev_contents, RW_READER);
2386
2387 sdcmn_err4(("readdir of directory %s by %s\n",
2388 ddv->sdev_name, curproc->p_user.u_comm));
2389 if (ddv->sdev_flags & SDEV_BUILD) {
2390 if (SDEV_IS_PERSIST(ddv)) {
2391 error = sdev_filldir_from_store(ddv,
2392 alloc_count, cred);
2393 }
2394 ddv->sdev_flags &= ~SDEV_BUILD;
2395 }
2396 }
2397
2398 get_cache:
2399 /* handle "." and ".." */
2400 diroff = 0;
2401 if (soff == 0) {
2402 /* first time */
2403 this_reclen = DIRENT64_RECLEN(1);
2404 if (alloc_count < this_reclen) {
2405 error = EINVAL;
2406 goto done;
2407 }
2408
2409 dp->d_ino = (ino64_t)ddv->sdev_ino;
2410 dp->d_off = (off64_t)1;
2411 dp->d_reclen = (ushort_t)this_reclen;
2412
2413 (void) strncpy(dp->d_name, ".",
2414 DIRENT64_NAMELEN(this_reclen));
2415 outcount += dp->d_reclen;
2416 dp = nextdp(dp);
2417 }
2418
2419 diroff++;
2420 if (soff <= 1) {
2421 this_reclen = DIRENT64_RECLEN(2);
2422 if (alloc_count < outcount + this_reclen) {
2423 error = EINVAL;
2424 goto done;
2425 }
2426
2427 dp->d_reclen = (ushort_t)this_reclen;
2428 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2429 dp->d_off = (off64_t)2;
2430
2431 (void) strncpy(dp->d_name, "..",
2432 DIRENT64_NAMELEN(this_reclen));
2433 outcount += dp->d_reclen;
2434
2435 dp = nextdp(dp);
2436 }
2437
2438
2439 /* gets the cache */
2440 diroff++;
2441 for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2442 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2443 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2444 diroff, soff, dv->sdev_name));
2445
2446 /* bypassing pre-matured nodes */
2447 if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2448 sdcmn_err3(("sdev_readdir: pre-mature node "
2449 "%s %d\n", dv->sdev_name, dv->sdev_state));
2450 continue;
2451 }
2452
2453 /*
2454 * Check validity of node
2455 * Drop invalid and nodes to be skipped.
2456 * A node the validator indicates as stale needs
2457 * to be returned as presumably the node name itself
2458 * is valid and the node data itself will be refreshed
2459 * on lookup. An application performing a readdir then
2460 * stat on each entry should thus always see consistent
2461 * data. In any case, it is not possible to synchronize
2462 * with dynamic kernel state, and any view we return can
2463 * never be anything more than a snapshot at a point in time.
2464 */
2465 if (vtor) {
2466 switch (vtor(dv)) {
2467 case SDEV_VTOR_VALID:
2468 break;
2469 case SDEV_VTOR_INVALID:
2470 case SDEV_VTOR_SKIP:
2471 continue;
2472 case SDEV_VTOR_STALE:
2473 sdcmn_err3(("sdev_readir: %s stale\n",
2474 dv->sdev_name));
2475 break;
2476 default:
2477 cmn_err(CE_PANIC,
2478 "dev fs: validator failed: %s(%p)\n",
2479 dv->sdev_name, (void *)dv);
2480 break;
2481 /*NOTREACHED*/
2482 }
2483 }
2484
2485 namelen = strlen(dv->sdev_name);
2486 reclen = DIRENT64_RECLEN(namelen);
2487 if (outcount + reclen > alloc_count) {
2488 goto full;
2489 }
2490 dp->d_reclen = (ushort_t)reclen;
2491 dp->d_ino = (ino64_t)dv->sdev_ino;
2492 dp->d_off = (off64_t)diroff + 1;
2493 (void) strncpy(dp->d_name, dv->sdev_name,
2494 DIRENT64_NAMELEN(reclen));
2495 outcount += reclen;
2496 dp = nextdp(dp);
2497 }
2498
2499 full:
2500 sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2501 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2502 (void *)dv));
2503
2504 if (outcount)
2505 error = uiomove(outbuf, outcount, UIO_READ, uiop);
2506
2507 if (!error) {
2508 uiop->uio_loffset = diroff;
2509 if (eofp)
2510 *eofp = dv ? 0 : 1;
2511 }
2512
2513
2514 if (ddv->sdev_attrvp) {
2515 gethrestime(&now);
2516 attr.va_ctime = now;
2517 attr.va_atime = now;
2518 attr.va_mask = AT_CTIME|AT_ATIME;
2519
2520 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2521 }
2522 done:
2523 kmem_free(outbuf, alloc_count);
2524 return (error);
2525 }
2526
2527 static int
2528 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2529 {
2530 vnode_t *vp;
2531 vnode_t *cvp;
2532 struct sdev_node *svp;
2533 char *nm;
2534 struct pathname pn;
2535 int error;
2536 int persisted = 0;
2537
2538 ASSERT(INGLOBALZONE(curproc));
2539
2540 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2541 return (error);
2542 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2543
2544 vp = rootdir;
2545 VN_HOLD(vp);
2546
2547 while (pn_pathleft(&pn)) {
2548 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2549 (void) pn_getcomponent(&pn, nm);
2550
2551 /*
2552 * Deal with the .. special case where we may be
2553 * traversing up across a mount point, to the
2554 * root of this filesystem or global root.
2555 */
2556 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2557 checkforroot:
2558 if (VN_CMP(vp, rootdir)) {
2559 nm[1] = 0;
2560 } else if (vp->v_flag & VROOT) {
2561 vfs_t *vfsp;
2562 cvp = vp;
2563 vfsp = cvp->v_vfsp;
2564 vfs_rlock_wait(vfsp);
2565 vp = cvp->v_vfsp->vfs_vnodecovered;
2566 if (vp == NULL ||
2567 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2568 vfs_unlock(vfsp);
2569 VN_RELE(cvp);
2570 error = EIO;
2571 break;
2572 }
2573 VN_HOLD(vp);
2574 vfs_unlock(vfsp);
2575 VN_RELE(cvp);
2576 cvp = NULL;
2577 goto checkforroot;
2578 }
2579 }
2580
2581 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2582 NULL, NULL);
2583 if (error) {
2584 VN_RELE(vp);
2585 break;
2586 }
2587
2588 /* traverse mount points encountered on our journey */
2589 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2590 VN_RELE(vp);
2591 VN_RELE(cvp);
2592 break;
2593 }
2594
2595 /*
2596 * symbolic link, can be either relative and absolute
2597 */
2598 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2599 struct pathname linkpath;
2600 pn_alloc(&linkpath);
2601 if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2602 pn_free(&linkpath);
2603 break;
2604 }
2605 if (pn_pathleft(&linkpath) == 0)
2606 (void) pn_set(&linkpath, ".");
2607 error = pn_insert(&pn, &linkpath, strlen(nm));
2608 pn_free(&linkpath);
2609 if (pn.pn_pathlen == 0) {
2610 VN_RELE(vp);
2611 return (ENOENT);
2612 }
2613 if (pn.pn_path[0] == '/') {
2614 pn_skipslash(&pn);
2615 VN_RELE(vp);
2616 VN_RELE(cvp);
2617 vp = rootdir;
2618 VN_HOLD(vp);
2619 } else {
2620 VN_RELE(cvp);
2621 }
2622 continue;
2623 }
2624
2625 VN_RELE(vp);
2626
2627 /*
2628 * Direct the operation to the persisting filesystem
2629 * underlying /dev. Bail if we encounter a
2630 * non-persistent dev entity here.
2631 */
2632 if (cvp->v_vfsp->vfs_fstype == devtype) {
2633
2634 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2635 error = ENOENT;
2636 VN_RELE(cvp);
2637 break;
2638 }
2639
2640 if (VTOSDEV(cvp) == NULL) {
2641 error = ENOENT;
2642 VN_RELE(cvp);
2643 break;
2644 }
2645 svp = VTOSDEV(cvp);
2646 if ((vp = svp->sdev_attrvp) == NULL) {
2647 error = ENOENT;
2648 VN_RELE(cvp);
2649 break;
2650 }
2651 persisted = 1;
2652 VN_HOLD(vp);
2653 VN_RELE(cvp);
2654 cvp = vp;
2655 }
2656
2657 vp = cvp;
2658 pn_skipslash(&pn);
2659 }
2660
2661 kmem_free(nm, MAXNAMELEN);
2662 pn_free(&pn);
2663
2664 if (error)
2665 return (error);
2666
2667 /*
2668 * Only return persisted nodes in the filesystem underlying /dev.
2669 */
2670 if (!persisted) {
2671 VN_RELE(vp);
2672 return (ENOENT);
2673 }
2674
2675 *r_vp = vp;
2676 return (0);
2677 }
2678
2679 int
2680 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2681 int *npathsp, int *npathsp_alloc, int checking_empty)
2682 {
2683 char **pathlist = NULL;
2684 char **newlist = NULL;
2685 int npaths = 0;
2686 int npaths_alloc = 0;
2687 dirent64_t *dbuf = NULL;
2688 int n;
2689 char *s;
2690 int error;
2691 vnode_t *vp;
2692 int eof;
2693 struct iovec iov;
2694 struct uio uio;
2695 struct dirent64 *dp;
2696 size_t dlen;
2697 size_t dbuflen;
2698 int ndirents = 64;
2699 char *nm;
2700
2701 error = sdev_modctl_lookup(dir, &vp);
2702 sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2703 dir, curproc->p_user.u_comm,
2704 (error == 0) ? "ok" : "failed"));
2705 if (error)
2706 return (error);
2707
2708 dlen = ndirents * (sizeof (*dbuf));
2709 dbuf = kmem_alloc(dlen, KM_SLEEP);
2710
2711 uio.uio_iov = &iov;
2712 uio.uio_iovcnt = 1;
2713 uio.uio_segflg = UIO_SYSSPACE;
2714 uio.uio_fmode = 0;
2715 uio.uio_extflg = UIO_COPY_CACHED;
2716 uio.uio_loffset = 0;
2717 uio.uio_llimit = MAXOFFSET_T;
2718
2719 eof = 0;
2720 error = 0;
2721 while (!error && !eof) {
2722 uio.uio_resid = dlen;
2723 iov.iov_base = (char *)dbuf;
2724 iov.iov_len = dlen;
2725
2726 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2727 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2728 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2729
2730 dbuflen = dlen - uio.uio_resid;
2731
2732 if (error || dbuflen == 0)
2733 break;
2734
2735 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2736 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2737
2738 nm = dp->d_name;
2739
2740 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2741 continue;
2742 if (npaths == npaths_alloc) {
2743 npaths_alloc += 64;
2744 newlist = (char **)
2745 kmem_zalloc((npaths_alloc + 1) *
2746 sizeof (char *), KM_SLEEP);
2747 if (pathlist) {
2748 bcopy(pathlist, newlist,
2749 npaths * sizeof (char *));
2750 kmem_free(pathlist,
2751 (npaths + 1) * sizeof (char *));
2752 }
2753 pathlist = newlist;
2754 }
2755 n = strlen(nm) + 1;
2756 s = kmem_alloc(n, KM_SLEEP);
2757 bcopy(nm, s, n);
2758 pathlist[npaths++] = s;
2759 sdcmn_err11((" %s/%s\n", dir, s));
2760
2761 /* if checking empty, one entry is as good as many */
2762 if (checking_empty) {
2763 eof = 1;
2764 break;
2765 }
2766 }
2767 }
2768
2769 exit:
2770 VN_RELE(vp);
2771
2772 if (dbuf)
2773 kmem_free(dbuf, dlen);
2774
2775 if (error)
2776 return (error);
2777
2778 *dirlistp = pathlist;
2779 *npathsp = npaths;
2780 *npathsp_alloc = npaths_alloc;
2781
2782 return (0);
2783 }
2784
2785 void
2786 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2787 {
2788 int i, n;
2789
2790 for (i = 0; i < npaths; i++) {
2791 n = strlen(pathlist[i]) + 1;
2792 kmem_free(pathlist[i], n);
2793 }
2794
2795 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2796 }
2797
2798 int
2799 sdev_modctl_devexists(const char *path)
2800 {
2801 vnode_t *vp;
2802 int error;
2803
2804 error = sdev_modctl_lookup(path, &vp);
2805 sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2806 path, curproc->p_user.u_comm,
2807 (error == 0) ? "ok" : "failed"));
2808 if (error == 0)
2809 VN_RELE(vp);
2810
2811 return (error);
2812 }
2813
2814 /*
2815 * a generic setattr() function
2816 *
2817 * note: flags only supports AT_UID and AT_GID.
2818 * Future enhancements can be done for other types, e.g. AT_MODE
2819 */
2820 int
2821 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2822 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
2823 int), int protocol)
2824 {
2825 struct sdev_node *dv = VTOSDEV(vp);
2826 struct sdev_node *parent = dv->sdev_dotdot;
2827 struct vattr *get;
2828 uint_t mask = vap->va_mask;
2829 int error;
2830
2831 /* some sanity checks */
2832 if (vap->va_mask & AT_NOSET)
2833 return (EINVAL);
2834
2835 if (vap->va_mask & AT_SIZE) {
2836 if (vp->v_type == VDIR) {
2837 return (EISDIR);
2838 }
2839 }
2840
2841 /* no need to set attribute, but do not fail either */
2842 ASSERT(parent);
2843 rw_enter(&parent->sdev_contents, RW_READER);
2844 if (dv->sdev_state == SDEV_ZOMBIE) {
2845 rw_exit(&parent->sdev_contents);
2846 return (0);
2847 }
2848
2849 /* If backing store exists, just set it. */
2850 if (dv->sdev_attrvp) {
2851 rw_exit(&parent->sdev_contents);
2852 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2853 }
2854
2855 /*
2856 * Otherwise, for nodes with the persistence attribute, create it.
2857 */
2858 ASSERT(dv->sdev_attr);
2859 if (SDEV_IS_PERSIST(dv) ||
2860 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
2861 sdev_vattr_merge(dv, vap);
2862 rw_enter(&dv->sdev_contents, RW_WRITER);
2863 error = sdev_shadow_node(dv, cred);
2864 rw_exit(&dv->sdev_contents);
2865 rw_exit(&parent->sdev_contents);
2866
2867 if (error)
2868 return (error);
2869 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
2870 }
2871
2872
2873 /*
2874 * sdev_attr was allocated in sdev_mknode
2875 */
2876 rw_enter(&dv->sdev_contents, RW_WRITER);
2877 error = secpolicy_vnode_setattr(cred, vp, vap,
2878 dv->sdev_attr, flags, sdev_unlocked_access, dv);
2879 if (error) {
2880 rw_exit(&dv->sdev_contents);
2881 rw_exit(&parent->sdev_contents);
2882 return (error);
2883 }
2884
2885 get = dv->sdev_attr;
2886 if (mask & AT_MODE) {
2887 get->va_mode &= S_IFMT;
2888 get->va_mode |= vap->va_mode & ~S_IFMT;
2889 }
2890
2891 if ((mask & AT_UID) || (mask & AT_GID)) {
2892 if (mask & AT_UID)
2893 get->va_uid = vap->va_uid;
2894 if (mask & AT_GID)
2895 get->va_gid = vap->va_gid;
2896 /*
2897 * a callback must be provided if the protocol is set
2898 */
2899 if ((protocol & AT_UID) || (protocol & AT_GID)) {
2900 ASSERT(callback);
2901 error = callback(dv, get, protocol);
2902 if (error) {
2903 rw_exit(&dv->sdev_contents);
2904 rw_exit(&parent->sdev_contents);
2905 return (error);
2906 }
2907 }
2908 }
2909
2910 if (mask & AT_ATIME)
2911 get->va_atime = vap->va_atime;
2912 if (mask & AT_MTIME)
2913 get->va_mtime = vap->va_mtime;
2914 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
2915 gethrestime(&get->va_ctime);
2916 }
2917
2918 sdev_vattr_merge(dv, get);
2919 rw_exit(&dv->sdev_contents);
2920 rw_exit(&parent->sdev_contents);
2921 return (0);
2922 }
2923
2924 /*
2925 * a generic inactive() function
2926 */
2927 /*ARGSUSED*/
2928 void
2929 devname_inactive_func(struct vnode *vp, struct cred *cred,
2930 void (*callback)(struct vnode *))
2931 {
2932 int clean;
2933 struct sdev_node *dv = VTOSDEV(vp);
2934 int state;
2935
2936 mutex_enter(&vp->v_lock);
2937 ASSERT(vp->v_count >= 1);
2938
2939
2940 if (vp->v_count == 1 && callback != NULL)
2941 callback(vp);
2942
2943 rw_enter(&dv->sdev_contents, RW_WRITER);
2944 state = dv->sdev_state;
2945
2946 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
2947
2948 /*
2949 * sdev is a rather bad public citizen. It violates the general
2950 * agreement that in memory nodes should always have a valid reference
2951 * count on their vnode. But that's not the case here. This means that
2952 * we do actually have to distinguish between getting inactive callbacks
2953 * for zombies and otherwise. This should probably be fixed.
2954 */
2955 if (clean) {
2956 /* Remove the . entry to ourselves */
2957 if (vp->v_type == VDIR) {
2958 decr_link(dv);
2959 }
2960 VERIFY(dv->sdev_nlink == 1);
2961 decr_link(dv);
2962 --vp->v_count;
2963 rw_exit(&dv->sdev_contents);
2964 mutex_exit(&vp->v_lock);
2965 sdev_nodedestroy(dv, 0);
2966 } else {
2967 --vp->v_count;
2968 rw_exit(&dv->sdev_contents);
2969 mutex_exit(&vp->v_lock);
2970 }
2971 }