1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved.
24 */
25
26 /*
27 * utility routines for the /dev fs
28 */
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/t_lock.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/user.h>
36 #include <sys/time.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/file.h>
40 #include <sys/fcntl.h>
41 #include <sys/flock.h>
42 #include <sys/kmem.h>
43 #include <sys/uio.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 #include <sys/cred.h>
47 #include <sys/dirent.h>
48 #include <sys/pathname.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/mode.h>
52 #include <sys/policy.h>
53 #include <fs/fs_subr.h>
54 #include <sys/mount.h>
55 #include <sys/fs/snode.h>
56 #include <sys/fs/dv_node.h>
57 #include <sys/fs/sdev_impl.h>
58 #include <sys/sunndi.h>
59 #include <sys/sunmdi.h>
60 #include <sys/conf.h>
61 #include <sys/proc.h>
62 #include <sys/user.h>
63 #include <sys/modctl.h>
64
65 #ifdef DEBUG
66 int sdev_debug = 0x00000001;
67 int sdev_debug_cache_flags = 0;
68 #endif
69
70 /*
71 * globals
72 */
73 /* prototype memory vattrs */
74 vattr_t sdev_vattr_dir = {
75 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
76 VDIR, /* va_type */
77 SDEV_DIRMODE_DEFAULT, /* va_mode */
78 SDEV_UID_DEFAULT, /* va_uid */
79 SDEV_GID_DEFAULT, /* va_gid */
80 0, /* va_fsid */
81 0, /* va_nodeid */
82 0, /* va_nlink */
83 0, /* va_size */
84 0, /* va_atime */
85 0, /* va_mtime */
86 0, /* va_ctime */
87 0, /* va_rdev */
88 0, /* va_blksize */
89 0, /* va_nblocks */
90 0 /* va_vcode */
91 };
92
93 vattr_t sdev_vattr_lnk = {
94 AT_TYPE|AT_MODE, /* va_mask */
95 VLNK, /* va_type */
96 SDEV_LNKMODE_DEFAULT, /* va_mode */
97 SDEV_UID_DEFAULT, /* va_uid */
98 SDEV_GID_DEFAULT, /* va_gid */
99 0, /* va_fsid */
100 0, /* va_nodeid */
101 0, /* va_nlink */
102 0, /* va_size */
103 0, /* va_atime */
104 0, /* va_mtime */
105 0, /* va_ctime */
106 0, /* va_rdev */
107 0, /* va_blksize */
108 0, /* va_nblocks */
109 0 /* va_vcode */
110 };
111
112 vattr_t sdev_vattr_blk = {
113 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
114 VBLK, /* va_type */
115 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */
116 SDEV_UID_DEFAULT, /* va_uid */
117 SDEV_GID_DEFAULT, /* va_gid */
118 0, /* va_fsid */
119 0, /* va_nodeid */
120 0, /* va_nlink */
121 0, /* va_size */
122 0, /* va_atime */
123 0, /* va_mtime */
124 0, /* va_ctime */
125 0, /* va_rdev */
126 0, /* va_blksize */
127 0, /* va_nblocks */
128 0 /* va_vcode */
129 };
130
131 vattr_t sdev_vattr_chr = {
132 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
133 VCHR, /* va_type */
134 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */
135 SDEV_UID_DEFAULT, /* va_uid */
136 SDEV_GID_DEFAULT, /* va_gid */
137 0, /* va_fsid */
138 0, /* va_nodeid */
139 0, /* va_nlink */
140 0, /* va_size */
141 0, /* va_atime */
142 0, /* va_mtime */
143 0, /* va_ctime */
144 0, /* va_rdev */
145 0, /* va_blksize */
146 0, /* va_nblocks */
147 0 /* va_vcode */
148 };
149
150 kmem_cache_t *sdev_node_cache; /* sdev_node cache */
151 int devtype; /* fstype */
152
153 /* static */
154 static struct vnodeops *sdev_get_vop(struct sdev_node *);
155 static void sdev_set_no_negcache(struct sdev_node *);
156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
157 static void sdev_free_vtab(fs_operation_def_t *);
158
159 static void
160 sdev_prof_free(struct sdev_node *dv)
161 {
162 ASSERT(!SDEV_IS_GLOBAL(dv));
163 nvlist_free(dv->sdev_prof.dev_name);
164 nvlist_free(dv->sdev_prof.dev_map);
165 nvlist_free(dv->sdev_prof.dev_symlink);
166 nvlist_free(dv->sdev_prof.dev_glob_incdir);
167 nvlist_free(dv->sdev_prof.dev_glob_excdir);
168 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
169 }
170
171 /* sdev_node cache constructor */
172 /*ARGSUSED1*/
173 static int
174 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
175 {
176 struct sdev_node *dv = (struct sdev_node *)buf;
177 struct vnode *vp;
178
179 bzero(buf, sizeof (struct sdev_node));
180 vp = dv->sdev_vnode = vn_alloc(flag);
181 if (vp == NULL) {
182 return (-1);
183 }
184 vp->v_data = dv;
185 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
186 return (0);
187 }
188
189 /* sdev_node cache destructor */
190 /*ARGSUSED1*/
191 static void
192 i_sdev_node_dtor(void *buf, void *arg)
193 {
194 struct sdev_node *dv = (struct sdev_node *)buf;
195 struct vnode *vp = SDEVTOV(dv);
196
197 rw_destroy(&dv->sdev_contents);
198 vn_free(vp);
199 }
200
201 /* initialize sdev_node cache */
202 void
203 sdev_node_cache_init()
204 {
205 int flags = 0;
206
207 #ifdef DEBUG
208 flags = sdev_debug_cache_flags;
209 if (flags)
210 sdcmn_err(("cache debug flags 0x%x\n", flags));
211 #endif /* DEBUG */
212
213 ASSERT(sdev_node_cache == NULL);
214 sdev_node_cache = kmem_cache_create("sdev_node_cache",
215 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
216 NULL, NULL, NULL, flags);
217 }
218
219 /* destroy sdev_node cache */
220 void
221 sdev_node_cache_fini()
222 {
223 ASSERT(sdev_node_cache != NULL);
224 kmem_cache_destroy(sdev_node_cache);
225 sdev_node_cache = NULL;
226 }
227
228 /*
229 * Compare two nodes lexographically to balance avl tree
230 */
231 static int
232 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
233 {
234 int rv;
235 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
236 return (0);
237 return ((rv < 0) ? -1 : 1);
238 }
239
240 void
241 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
242 {
243 ASSERT(dv);
244 ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
245 dv->sdev_state = state;
246 }
247
248 static void
249 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
250 {
251 timestruc_t now;
252 struct vattr *attrp;
253 uint_t mask;
254
255 ASSERT(dv->sdev_attr);
256 ASSERT(vap);
257
258 attrp = dv->sdev_attr;
259 mask = vap->va_mask;
260 if (mask & AT_TYPE)
261 attrp->va_type = vap->va_type;
262 if (mask & AT_MODE)
263 attrp->va_mode = vap->va_mode;
264 if (mask & AT_UID)
265 attrp->va_uid = vap->va_uid;
266 if (mask & AT_GID)
267 attrp->va_gid = vap->va_gid;
268 if (mask & AT_RDEV)
269 attrp->va_rdev = vap->va_rdev;
270
271 gethrestime(&now);
272 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
273 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
274 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
275 }
276
277 static void
278 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
279 {
280 ASSERT(dv->sdev_attr == NULL);
281 ASSERT(vap->va_mask & AT_TYPE);
282 ASSERT(vap->va_mask & AT_MODE);
283
284 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
285 sdev_attr_update(dv, vap);
286 }
287
288 /* alloc and initialize a sdev_node */
289 int
290 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
291 vattr_t *vap)
292 {
293 struct sdev_node *dv = NULL;
294 struct vnode *vp;
295 size_t nmlen, len;
296 devname_handle_t *dhl;
297
298 nmlen = strlen(nm) + 1;
299 if (nmlen > MAXNAMELEN) {
300 sdcmn_err9(("sdev_nodeinit: node name %s"
301 " too long\n", nm));
302 *newdv = NULL;
303 return (ENAMETOOLONG);
304 }
305
306 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
307
308 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
309 bcopy(nm, dv->sdev_name, nmlen);
310 dv->sdev_namelen = nmlen - 1; /* '\0' not included */
311 len = strlen(ddv->sdev_path) + strlen(nm) + 2;
312 dv->sdev_path = kmem_alloc(len, KM_SLEEP);
313 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
314 /* overwritten for VLNK nodes */
315 dv->sdev_symlink = NULL;
316
317 vp = SDEVTOV(dv);
318 vn_reinit(vp);
319 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
320 if (vap)
321 vp->v_type = vap->va_type;
322
323 /*
324 * initialized to the parent's vnodeops.
325 * maybe overwriten for a VDIR
326 */
327 vn_setops(vp, vn_getops(SDEVTOV(ddv)));
328 vn_exists(vp);
329
330 dv->sdev_dotdot = NULL;
331 dv->sdev_attrvp = NULL;
332 if (vap) {
333 sdev_attr_alloc(dv, vap);
334 } else {
335 dv->sdev_attr = NULL;
336 }
337
338 dv->sdev_ino = sdev_mkino(dv);
339 dv->sdev_nlink = 0; /* updated on insert */
340 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
341 dv->sdev_flags |= SDEV_BUILD;
342 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
343 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
344 if (SDEV_IS_GLOBAL(ddv)) {
345 dv->sdev_flags |= SDEV_GLOBAL;
346 dhl = &(dv->sdev_handle);
347 dhl->dh_data = dv;
348 dhl->dh_args = NULL;
349 sdev_set_no_negcache(dv);
350 dv->sdev_gdir_gen = 0;
351 } else {
352 dv->sdev_flags &= ~SDEV_GLOBAL;
353 dv->sdev_origin = NULL; /* set later */
354 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
355 dv->sdev_ldir_gen = 0;
356 dv->sdev_devtree_gen = 0;
357 }
358
359 rw_enter(&dv->sdev_contents, RW_WRITER);
360 sdev_set_nodestate(dv, SDEV_INIT);
361 rw_exit(&dv->sdev_contents);
362 *newdv = dv;
363
364 return (0);
365 }
366
367 /*
368 * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
369 * caller to transition the node to the SDEV_ZOMBIE state.
370 */
371 int
372 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
373 void *args, struct cred *cred)
374 {
375 int error = 0;
376 struct vnode *vp = SDEVTOV(dv);
377 vtype_t type;
378
379 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
380
381 type = vap->va_type;
382 vp->v_type = type;
383 vp->v_rdev = vap->va_rdev;
384 rw_enter(&dv->sdev_contents, RW_WRITER);
385 if (type == VDIR) {
386 dv->sdev_nlink = 2;
387 dv->sdev_flags &= ~SDEV_PERSIST;
388 dv->sdev_flags &= ~SDEV_DYNAMIC;
389 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
390 ASSERT(dv->sdev_dotdot);
391 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
392 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
393 avl_create(&dv->sdev_entries,
394 (int (*)(const void *, const void *))sdev_compare_nodes,
395 sizeof (struct sdev_node),
396 offsetof(struct sdev_node, sdev_avllink));
397 } else if (type == VLNK) {
398 ASSERT(args);
399 dv->sdev_nlink = 1;
400 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
401 } else {
402 dv->sdev_nlink = 1;
403 }
404
405 if (!(SDEV_IS_GLOBAL(dv))) {
406 dv->sdev_origin = (struct sdev_node *)args;
407 dv->sdev_flags &= ~SDEV_PERSIST;
408 }
409
410 /*
411 * shadow node is created here OR
412 * if failed (indicated by dv->sdev_attrvp == NULL),
413 * created later in sdev_setattr
414 */
415 if (avp) {
416 dv->sdev_attrvp = avp;
417 } else {
418 if (dv->sdev_attr == NULL) {
419 sdev_attr_alloc(dv, vap);
420 } else {
421 sdev_attr_update(dv, vap);
422 }
423
424 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
425 error = sdev_shadow_node(dv, cred);
426 }
427
428 if (error == 0) {
429 /* transition to READY state */
430 sdev_set_nodestate(dv, SDEV_READY);
431 sdev_nc_node_exists(dv);
432 }
433 rw_exit(&dv->sdev_contents);
434 return (error);
435 }
436
437 /*
438 * Build the VROOT sdev_node.
439 */
440 /*ARGSUSED*/
441 struct sdev_node *
442 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
443 struct vnode *avp, struct cred *cred)
444 {
445 struct sdev_node *dv;
446 struct vnode *vp;
447 char devdir[] = "/dev";
448
449 ASSERT(sdev_node_cache != NULL);
450 ASSERT(avp);
451 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
452 vp = SDEVTOV(dv);
453 vn_reinit(vp);
454 vp->v_flag |= VROOT;
455 vp->v_vfsp = vfsp;
456 vp->v_type = VDIR;
457 vp->v_rdev = devdev;
458 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
459 vn_exists(vp);
460
461 if (vfsp->vfs_mntpt)
462 dv->sdev_name = i_ddi_strdup(
463 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
464 else
465 /* vfs_mountdev1 set mount point later */
466 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
467 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
468 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
469 dv->sdev_ino = SDEV_ROOTINO;
470 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */
471 dv->sdev_dotdot = dv; /* .. == self */
472 dv->sdev_attrvp = avp;
473 dv->sdev_attr = NULL;
474 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
475 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
476 if (strcmp(dv->sdev_name, "/dev") == 0) {
477 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
478 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
479 dv->sdev_gdir_gen = 0;
480 } else {
481 dv->sdev_flags = SDEV_BUILD;
482 dv->sdev_flags &= ~SDEV_PERSIST;
483 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
484 dv->sdev_ldir_gen = 0;
485 dv->sdev_devtree_gen = 0;
486 }
487
488 avl_create(&dv->sdev_entries,
489 (int (*)(const void *, const void *))sdev_compare_nodes,
490 sizeof (struct sdev_node),
491 offsetof(struct sdev_node, sdev_avllink));
492
493 rw_enter(&dv->sdev_contents, RW_WRITER);
494 sdev_set_nodestate(dv, SDEV_READY);
495 rw_exit(&dv->sdev_contents);
496 sdev_nc_node_exists(dv);
497 return (dv);
498 }
499
500 /* directory dependent vop table */
501 struct sdev_vop_table {
502 char *vt_name; /* subdirectory name */
503 const fs_operation_def_t *vt_service; /* vnodeops table */
504 struct vnodeops *vt_vops; /* constructed vop */
505 struct vnodeops **vt_global_vops; /* global container for vop */
506 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */
507 int vt_flags;
508 };
509
510 /*
511 * A nice improvement would be to provide a plug-in mechanism
512 * for this table instead of a const table.
513 */
514 static struct sdev_vop_table vtab[] =
515 {
516 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
517 SDEV_DYNAMIC | SDEV_VTOR },
518
519 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
520 SDEV_DYNAMIC | SDEV_VTOR },
521
522 { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
523 devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
524
525 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
526
527 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
528 SDEV_DYNAMIC | SDEV_VTOR },
529
530 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
531 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
532
533 /*
534 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
535 * lofi driver controls child nodes.
536 *
537 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
538 * stale nodes (e.g. from devfsadm -R).
539 *
540 * In addition, devfsadm knows not to attempt a rmdir: a zone
541 * may hold a reference, which would zombify the node,
542 * preventing a mkdir.
543 */
544
545 { "lofi", NULL, NULL, NULL, NULL,
546 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
547 { "rlofi", NULL, NULL, NULL, NULL,
548 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
549
550 { NULL, NULL, NULL, NULL, NULL, 0}
551 };
552
553 /*
554 * We need to match off of the sdev_path, not the sdev_name. We are only allowed
555 * to exist directly under /dev.
556 */
557 struct sdev_vop_table *
558 sdev_match(struct sdev_node *dv)
559 {
560 int vlen;
561 int i;
562 const char *path;
563
564 if (strlen(dv->sdev_path) <= 5)
565 return (NULL);
566
567 if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
568 return (NULL);
569 path = dv->sdev_path + 5;
570
571 for (i = 0; vtab[i].vt_name; i++) {
572 if (strcmp(vtab[i].vt_name, path) == 0)
573 return (&vtab[i]);
574 if (vtab[i].vt_flags & SDEV_SUBDIR) {
575 vlen = strlen(vtab[i].vt_name);
576 if ((strncmp(vtab[i].vt_name, path,
577 vlen - 1) == 0) && path[vlen] == '/')
578 return (&vtab[i]);
579 }
580
581 }
582 return (NULL);
583 }
584
585 /*
586 * sets a directory's vnodeops if the directory is in the vtab;
587 */
588 static struct vnodeops *
589 sdev_get_vop(struct sdev_node *dv)
590 {
591 struct sdev_vop_table *vtp;
592 char *path;
593
594 path = dv->sdev_path;
595 ASSERT(path);
596
597 /* gets the relative path to /dev/ */
598 path += 5;
599
600 /* gets the vtab entry it matches */
601 if ((vtp = sdev_match(dv)) != NULL) {
602 dv->sdev_flags |= vtp->vt_flags;
603 if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
604 (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
605 dv->sdev_flags |= SDEV_PERSIST;
606
607 if (vtp->vt_vops) {
608 if (vtp->vt_global_vops)
609 *(vtp->vt_global_vops) = vtp->vt_vops;
610
611 return (vtp->vt_vops);
612 }
613
614 if (vtp->vt_service) {
615 fs_operation_def_t *templ;
616 templ = sdev_merge_vtab(vtp->vt_service);
617 if (vn_make_ops(vtp->vt_name,
618 (const fs_operation_def_t *)templ,
619 &vtp->vt_vops) != 0) {
620 cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
621 vtp->vt_name);
622 /*NOTREACHED*/
623 }
624 if (vtp->vt_global_vops) {
625 *(vtp->vt_global_vops) = vtp->vt_vops;
626 }
627 sdev_free_vtab(templ);
628
629 return (vtp->vt_vops);
630 }
631
632 return (sdev_vnodeops);
633 }
634
635 /* child inherits the persistence of the parent */
636 if (SDEV_IS_PERSIST(dv->sdev_dotdot))
637 dv->sdev_flags |= SDEV_PERSIST;
638
639 return (sdev_vnodeops);
640 }
641
642 static void
643 sdev_set_no_negcache(struct sdev_node *dv)
644 {
645 int i;
646 char *path;
647
648 ASSERT(dv->sdev_path);
649 path = dv->sdev_path + strlen("/dev/");
650
651 for (i = 0; vtab[i].vt_name; i++) {
652 if (strcmp(vtab[i].vt_name, path) == 0) {
653 if (vtab[i].vt_flags & SDEV_NO_NCACHE)
654 dv->sdev_flags |= SDEV_NO_NCACHE;
655 break;
656 }
657 }
658 }
659
660 void *
661 sdev_get_vtor(struct sdev_node *dv)
662 {
663 struct sdev_vop_table *vtp;
664
665 vtp = sdev_match(dv);
666 if (vtp)
667 return ((void *)vtp->vt_vtor);
668 else
669 return (NULL);
670 }
671
672 /*
673 * Build the base root inode
674 */
675 ino_t
676 sdev_mkino(struct sdev_node *dv)
677 {
678 ino_t ino;
679
680 /*
681 * for now, follow the lead of tmpfs here
682 * need to someday understand the requirements here
683 */
684 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
685 ino += SDEV_ROOTINO + 1;
686
687 return (ino);
688 }
689
690 int
691 sdev_getlink(struct vnode *linkvp, char **link)
692 {
693 int err;
694 char *buf;
695 struct uio uio = {0};
696 struct iovec iov = {0};
697
698 if (linkvp == NULL)
699 return (ENOENT);
700 ASSERT(linkvp->v_type == VLNK);
701
702 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
703 iov.iov_base = buf;
704 iov.iov_len = MAXPATHLEN;
705 uio.uio_iov = &iov;
706 uio.uio_iovcnt = 1;
707 uio.uio_resid = MAXPATHLEN;
708 uio.uio_segflg = UIO_SYSSPACE;
709 uio.uio_llimit = MAXOFFSET_T;
710
711 err = VOP_READLINK(linkvp, &uio, kcred, NULL);
712 if (err) {
713 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
714 kmem_free(buf, MAXPATHLEN);
715 return (ENOENT);
716 }
717
718 /* mission complete */
719 *link = i_ddi_strdup(buf, KM_SLEEP);
720 kmem_free(buf, MAXPATHLEN);
721 return (0);
722 }
723
724 /*
725 * A convenient wrapper to get the devfs node vnode for a device
726 * minor functionality: readlink() of a /dev symlink
727 * Place the link into dv->sdev_symlink
728 */
729 static int
730 sdev_follow_link(struct sdev_node *dv)
731 {
732 int err;
733 struct vnode *linkvp;
734 char *link = NULL;
735
736 linkvp = SDEVTOV(dv);
737 if (linkvp == NULL)
738 return (ENOENT);
739 ASSERT(linkvp->v_type == VLNK);
740 err = sdev_getlink(linkvp, &link);
741 if (err) {
742 dv->sdev_symlink = NULL;
743 return (ENOENT);
744 }
745
746 ASSERT(link != NULL);
747 dv->sdev_symlink = link;
748 return (0);
749 }
750
751 static int
752 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
753 {
754 vtype_t otype = SDEVTOV(dv)->v_type;
755
756 /*
757 * existing sdev_node has a different type.
758 */
759 if (otype != nvap->va_type) {
760 sdcmn_err9(("sdev_node_check: existing node "
761 " %s type %d does not match new node type %d\n",
762 dv->sdev_name, otype, nvap->va_type));
763 return (EEXIST);
764 }
765
766 /*
767 * For a symlink, the target should be the same.
768 */
769 if (otype == VLNK) {
770 ASSERT(nargs != NULL);
771 ASSERT(dv->sdev_symlink != NULL);
772 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
773 sdcmn_err9(("sdev_node_check: existing node "
774 " %s has different symlink %s as new node "
775 " %s\n", dv->sdev_name, dv->sdev_symlink,
776 (char *)nargs));
777 return (EEXIST);
778 }
779 }
780
781 return (0);
782 }
783
784 /*
785 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
786 *
787 * arguments:
788 * - ddv (parent)
789 * - nm (child name)
790 * - newdv (sdev_node for nm is returned here)
791 * - vap (vattr for the node to be created, va_type should be set.
792 * - avp (attribute vnode)
793 * the defaults should be used if unknown)
794 * - cred
795 * - args
796 * . tnm (for VLNK)
797 * . global sdev_node (for !SDEV_GLOBAL)
798 * - state: SDEV_INIT, SDEV_READY
799 *
800 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
801 *
802 * NOTE: directory contents writers lock needs to be held before
803 * calling this routine.
804 */
805 int
806 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
807 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
808 sdev_node_state_t state)
809 {
810 int error = 0;
811 sdev_node_state_t node_state;
812 struct sdev_node *dv = NULL;
813
814 ASSERT(state != SDEV_ZOMBIE);
815 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
816
817 if (*newdv) {
818 dv = *newdv;
819 } else {
820 /* allocate and initialize a sdev_node */
821 if (ddv->sdev_state == SDEV_ZOMBIE) {
822 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
823 ddv->sdev_path));
824 return (ENOENT);
825 }
826
827 error = sdev_nodeinit(ddv, nm, &dv, vap);
828 if (error != 0) {
829 sdcmn_err9(("sdev_mknode: error %d,"
830 " name %s can not be initialized\n",
831 error, nm));
832 return (error);
833 }
834 ASSERT(dv);
835
836 /* insert into the directory cache */
837 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
838 }
839
840 ASSERT(dv);
841 node_state = dv->sdev_state;
842 ASSERT(node_state != SDEV_ZOMBIE);
843
844 if (state == SDEV_READY) {
845 switch (node_state) {
846 case SDEV_INIT:
847 error = sdev_nodeready(dv, vap, avp, args, cred);
848 if (error) {
849 sdcmn_err9(("sdev_mknode: node %s can NOT"
850 " be transitioned into READY state, "
851 "error %d\n", nm, error));
852 }
853 break;
854 case SDEV_READY:
855 /*
856 * Do some sanity checking to make sure
857 * the existing sdev_node is what has been
858 * asked for.
859 */
860 error = sdev_node_check(dv, vap, args);
861 break;
862 default:
863 break;
864 }
865 }
866
867 if (!error) {
868 *newdv = dv;
869 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
870 } else {
871 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
872 /*
873 * We created this node, it wasn't passed into us. Therefore it
874 * is up to us to delete it.
875 */
876 if (*newdv == NULL)
877 SDEV_SIMPLE_RELE(dv);
878 *newdv = NULL;
879 }
880
881 return (error);
882 }
883
884 /*
885 * convenient wrapper to change vp's ATIME, CTIME and MTIME
886 */
887 void
888 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
889 {
890 struct vattr attr;
891 timestruc_t now;
892 int err;
893
894 ASSERT(vp);
895 gethrestime(&now);
896 if (mask & AT_CTIME)
897 attr.va_ctime = now;
898 if (mask & AT_MTIME)
899 attr.va_mtime = now;
900 if (mask & AT_ATIME)
901 attr.va_atime = now;
902
903 attr.va_mask = (mask & AT_TIMES);
904 err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
905 if (err && (err != EROFS)) {
906 sdcmn_err(("update timestamps error %d\n", err));
907 }
908 }
909
910 /*
911 * the backing store vnode is released here
912 */
913 /*ARGSUSED1*/
914 void
915 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
916 {
917 /* no references */
918 ASSERT(dv->sdev_nlink == 0);
919
920 if (dv->sdev_attrvp != NULLVP) {
921 VN_RELE(dv->sdev_attrvp);
922 /*
923 * reset the attrvp so that no more
924 * references can be made on this already
925 * vn_rele() vnode
926 */
927 dv->sdev_attrvp = NULLVP;
928 }
929
930 if (dv->sdev_attr != NULL) {
931 kmem_free(dv->sdev_attr, sizeof (struct vattr));
932 dv->sdev_attr = NULL;
933 }
934
935 if (dv->sdev_name != NULL) {
936 kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
937 dv->sdev_name = NULL;
938 }
939
940 if (dv->sdev_symlink != NULL) {
941 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
942 dv->sdev_symlink = NULL;
943 }
944
945 if (dv->sdev_path) {
946 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
947 dv->sdev_path = NULL;
948 }
949
950 if (!SDEV_IS_GLOBAL(dv))
951 sdev_prof_free(dv);
952
953 if (SDEVTOV(dv)->v_type == VDIR) {
954 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
955 avl_destroy(&dv->sdev_entries);
956 }
957
958 mutex_destroy(&dv->sdev_lookup_lock);
959 cv_destroy(&dv->sdev_lookup_cv);
960
961 /* return node to initial state as per constructor */
962 (void) memset((void *)&dv->sdev_instance_data, 0,
963 sizeof (dv->sdev_instance_data));
964 vn_invalid(SDEVTOV(dv));
965 dv->sdev_private = NULL;
966 kmem_cache_free(sdev_node_cache, dv);
967 }
968
969 /*
970 * DIRECTORY CACHE lookup
971 */
972 struct sdev_node *
973 sdev_findbyname(struct sdev_node *ddv, char *nm)
974 {
975 struct sdev_node *dv;
976 struct sdev_node dvtmp;
977 avl_index_t where;
978
979 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
980
981 dvtmp.sdev_name = nm;
982 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
983 if (dv) {
984 ASSERT(dv->sdev_dotdot == ddv);
985 ASSERT(strcmp(dv->sdev_name, nm) == 0);
986 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
987 SDEV_HOLD(dv);
988 return (dv);
989 }
990 return (NULL);
991 }
992
993 /*
994 * Inserts a new sdev_node in a parent directory
995 */
996 void
997 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
998 {
999 avl_index_t where;
1000
1001 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1002 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1003 ASSERT(ddv->sdev_nlink >= 2);
1004 ASSERT(dv->sdev_nlink == 0);
1005 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1006
1007 dv->sdev_dotdot = ddv;
1008 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1009 avl_insert(&ddv->sdev_entries, dv, where);
1010 ddv->sdev_nlink++;
1011 }
1012
1013 /*
1014 * The following check is needed because while sdev_nodes are linked
1015 * in SDEV_INIT state, they have their link counts incremented only
1016 * in SDEV_READY state.
1017 */
1018 static void
1019 decr_link(struct sdev_node *dv)
1020 {
1021 VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
1022 if (dv->sdev_state != SDEV_INIT) {
1023 VERIFY(dv->sdev_nlink >= 1);
1024 dv->sdev_nlink--;
1025 } else {
1026 VERIFY(dv->sdev_nlink == 0);
1027 }
1028 }
1029
1030 /*
1031 * Delete an existing dv from directory cache
1032 *
1033 * In the case of a node is still held by non-zero reference count, the node is
1034 * put into ZOMBIE state. The node is always unlinked from its parent, but it is
1035 * not destroyed via sdev_inactive until its reference count reaches "0".
1036 */
1037 static void
1038 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1039 {
1040 struct vnode *vp;
1041 sdev_node_state_t os;
1042
1043 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1044
1045 vp = SDEVTOV(dv);
1046 mutex_enter(&vp->v_lock);
1047 rw_enter(&dv->sdev_contents, RW_WRITER);
1048 os = dv->sdev_state;
1049 ASSERT(os != SDEV_ZOMBIE);
1050 dv->sdev_state = SDEV_ZOMBIE;
1051
1052 /*
1053 * unlink ourselves from the parent directory now to take care of the ..
1054 * link. However, if we're a directory, we don't remove our reference to
1055 * ourself eg. '.' until we are torn down in the inactive callback.
1056 */
1057 decr_link(ddv);
1058 avl_remove(&ddv->sdev_entries, dv);
1059 /*
1060 * sdev_inactive expects nodes to have a link to themselves when we're
1061 * tearing them down. If we're transitioning from the initial state to
1062 * zombie and not via ready, then we're not going to have this link that
1063 * comes from the node being ready. As a result, we need to increment
1064 * our link count by one to account for this.
1065 */
1066 if (os == SDEV_INIT && dv->sdev_nlink == 0)
1067 dv->sdev_nlink++;
1068 rw_exit(&dv->sdev_contents);
1069 mutex_exit(&vp->v_lock);
1070 }
1071
1072 /*
1073 * check if the source is in the path of the target
1074 *
1075 * source and target are different
1076 */
1077 /*ARGSUSED2*/
1078 static int
1079 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1080 {
1081 int error = 0;
1082 struct sdev_node *dotdot, *dir;
1083
1084 dotdot = tdv->sdev_dotdot;
1085 ASSERT(dotdot);
1086
1087 /* fs root */
1088 if (dotdot == tdv) {
1089 return (0);
1090 }
1091
1092 for (;;) {
1093 /*
1094 * avoid error cases like
1095 * mv a a/b
1096 * mv a a/b/c
1097 * etc.
1098 */
1099 if (dotdot == sdv) {
1100 error = EINVAL;
1101 break;
1102 }
1103
1104 dir = dotdot;
1105 dotdot = dir->sdev_dotdot;
1106
1107 /* done checking because root is reached */
1108 if (dir == dotdot) {
1109 break;
1110 }
1111 }
1112 return (error);
1113 }
1114
1115 int
1116 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1117 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1118 struct cred *cred)
1119 {
1120 int error = 0;
1121 struct vnode *ovp = SDEVTOV(odv);
1122 struct vnode *nvp;
1123 struct vattr vattr;
1124 int doingdir = (ovp->v_type == VDIR);
1125 char *link = NULL;
1126 int samedir = (oddv == nddv) ? 1 : 0;
1127 int bkstore = 0;
1128 struct sdev_node *idv = NULL;
1129 struct sdev_node *ndv = NULL;
1130 timestruc_t now;
1131
1132 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1133 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1134 if (error)
1135 return (error);
1136
1137 if (!samedir)
1138 rw_enter(&oddv->sdev_contents, RW_WRITER);
1139 rw_enter(&nddv->sdev_contents, RW_WRITER);
1140
1141 /*
1142 * the source may have been deleted by another thread before
1143 * we gets here.
1144 */
1145 if (odv->sdev_state != SDEV_READY) {
1146 error = ENOENT;
1147 goto err_out;
1148 }
1149
1150 if (doingdir && (odv == nddv)) {
1151 error = EINVAL;
1152 goto err_out;
1153 }
1154
1155 /*
1156 * If renaming a directory, and the parents are different (".." must be
1157 * changed) then the source dir must not be in the dir hierarchy above
1158 * the target since it would orphan everything below the source dir.
1159 */
1160 if (doingdir && (oddv != nddv)) {
1161 error = sdev_checkpath(odv, nddv, cred);
1162 if (error)
1163 goto err_out;
1164 }
1165
1166 /* fix the source for a symlink */
1167 if (vattr.va_type == VLNK) {
1168 if (odv->sdev_symlink == NULL) {
1169 error = sdev_follow_link(odv);
1170 if (error) {
1171 /*
1172 * The underlying symlink doesn't exist. This
1173 * node probably shouldn't even exist. While
1174 * it's a bit jarring to consumers, we're going
1175 * to remove the node from /dev.
1176 */
1177 if (SDEV_IS_PERSIST((*ndvp)))
1178 bkstore = 1;
1179 sdev_dirdelete(oddv, odv);
1180 if (bkstore) {
1181 ASSERT(nddv->sdev_attrvp);
1182 error = VOP_REMOVE(nddv->sdev_attrvp,
1183 nnm, cred, NULL, 0);
1184 if (error)
1185 goto err_out;
1186 }
1187 error = ENOENT;
1188 goto err_out;
1189 }
1190 }
1191 ASSERT(odv->sdev_symlink);
1192 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1193 }
1194
1195 /* destination existing */
1196 if (*ndvp) {
1197 nvp = SDEVTOV(*ndvp);
1198 ASSERT(nvp);
1199
1200 /* handling renaming to itself */
1201 if (odv == *ndvp) {
1202 error = 0;
1203 goto err_out;
1204 }
1205
1206 if (nvp->v_type == VDIR) {
1207 if (!doingdir) {
1208 error = EISDIR;
1209 goto err_out;
1210 }
1211
1212 if (vn_vfswlock(nvp)) {
1213 error = EBUSY;
1214 goto err_out;
1215 }
1216
1217 if (vn_mountedvfs(nvp) != NULL) {
1218 vn_vfsunlock(nvp);
1219 error = EBUSY;
1220 goto err_out;
1221 }
1222
1223 /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1224 if ((*ndvp)->sdev_nlink > 2) {
1225 vn_vfsunlock(nvp);
1226 error = EEXIST;
1227 goto err_out;
1228 }
1229 vn_vfsunlock(nvp);
1230
1231 /*
1232 * We did not place the hold on *ndvp, so even though
1233 * we're deleting the node, we should not get rid of our
1234 * reference.
1235 */
1236 sdev_dirdelete(nddv, *ndvp);
1237 *ndvp = NULL;
1238 ASSERT(nddv->sdev_attrvp);
1239 error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1240 nddv->sdev_attrvp, cred, NULL, 0);
1241 if (error)
1242 goto err_out;
1243 } else {
1244 if (doingdir) {
1245 error = ENOTDIR;
1246 goto err_out;
1247 }
1248
1249 if (SDEV_IS_PERSIST((*ndvp))) {
1250 bkstore = 1;
1251 }
1252
1253 /*
1254 * Get rid of the node from the directory cache note.
1255 * Don't forget that it's not up to us to remove the vn
1256 * ref on the sdev node, as we did not place it.
1257 */
1258 sdev_dirdelete(nddv, *ndvp);
1259 *ndvp = NULL;
1260 if (bkstore) {
1261 ASSERT(nddv->sdev_attrvp);
1262 error = VOP_REMOVE(nddv->sdev_attrvp,
1263 nnm, cred, NULL, 0);
1264 if (error)
1265 goto err_out;
1266 }
1267 }
1268 }
1269
1270 /*
1271 * make a fresh node from the source attrs
1272 */
1273 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1274 error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1275 NULL, (void *)link, cred, SDEV_READY);
1276
1277 if (link != NULL) {
1278 kmem_free(link, strlen(link) + 1);
1279 link = NULL;
1280 }
1281
1282 if (error)
1283 goto err_out;
1284 ASSERT(*ndvp);
1285 ASSERT((*ndvp)->sdev_state == SDEV_READY);
1286
1287 /* move dir contents */
1288 if (doingdir) {
1289 for (idv = SDEV_FIRST_ENTRY(odv); idv;
1290 idv = SDEV_NEXT_ENTRY(odv, idv)) {
1291 SDEV_HOLD(idv);
1292 error = sdev_rnmnode(odv, idv,
1293 (struct sdev_node *)(*ndvp), &ndv,
1294 idv->sdev_name, cred);
1295 SDEV_RELE(idv);
1296 if (error)
1297 goto err_out;
1298 ndv = NULL;
1299 }
1300 }
1301
1302 if ((*ndvp)->sdev_attrvp) {
1303 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1304 AT_CTIME|AT_ATIME);
1305 } else {
1306 ASSERT((*ndvp)->sdev_attr);
1307 gethrestime(&now);
1308 (*ndvp)->sdev_attr->va_ctime = now;
1309 (*ndvp)->sdev_attr->va_atime = now;
1310 }
1311
1312 if (nddv->sdev_attrvp) {
1313 sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1314 AT_MTIME|AT_ATIME);
1315 } else {
1316 ASSERT(nddv->sdev_attr);
1317 gethrestime(&now);
1318 nddv->sdev_attr->va_mtime = now;
1319 nddv->sdev_attr->va_atime = now;
1320 }
1321 rw_exit(&nddv->sdev_contents);
1322 if (!samedir)
1323 rw_exit(&oddv->sdev_contents);
1324
1325 SDEV_RELE(*ndvp);
1326 return (error);
1327
1328 err_out:
1329 if (link != NULL) {
1330 kmem_free(link, strlen(link) + 1);
1331 link = NULL;
1332 }
1333
1334 rw_exit(&nddv->sdev_contents);
1335 if (!samedir)
1336 rw_exit(&oddv->sdev_contents);
1337 return (error);
1338 }
1339
1340 /*
1341 * Merge sdev_node specific information into an attribute structure.
1342 *
1343 * note: sdev_node is not locked here
1344 */
1345 void
1346 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1347 {
1348 struct vnode *vp = SDEVTOV(dv);
1349
1350 vap->va_nlink = dv->sdev_nlink;
1351 vap->va_nodeid = dv->sdev_ino;
1352 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1353 vap->va_type = vp->v_type;
1354
1355 if (vp->v_type == VDIR) {
1356 vap->va_rdev = 0;
1357 vap->va_fsid = vp->v_rdev;
1358 } else if (vp->v_type == VLNK) {
1359 vap->va_rdev = 0;
1360 vap->va_mode &= ~S_IFMT;
1361 vap->va_mode |= S_IFLNK;
1362 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1363 vap->va_rdev = vp->v_rdev;
1364 vap->va_mode &= ~S_IFMT;
1365 if (vap->va_type == VCHR)
1366 vap->va_mode |= S_IFCHR;
1367 else
1368 vap->va_mode |= S_IFBLK;
1369 } else {
1370 vap->va_rdev = 0;
1371 }
1372 }
1373
1374 struct vattr *
1375 sdev_getdefault_attr(enum vtype type)
1376 {
1377 if (type == VDIR)
1378 return (&sdev_vattr_dir);
1379 else if (type == VCHR)
1380 return (&sdev_vattr_chr);
1381 else if (type == VBLK)
1382 return (&sdev_vattr_blk);
1383 else if (type == VLNK)
1384 return (&sdev_vattr_lnk);
1385 else
1386 return (NULL);
1387 }
1388 int
1389 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1390 {
1391 int rv = 0;
1392 struct vnode *vp = SDEVTOV(dv);
1393
1394 switch (vp->v_type) {
1395 case VCHR:
1396 case VBLK:
1397 /*
1398 * If vnode is a device, return special vnode instead
1399 * (though it knows all about -us- via sp->s_realvp)
1400 */
1401 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1402 VN_RELE(vp);
1403 if (*vpp == NULLVP)
1404 rv = ENOSYS;
1405 break;
1406 default: /* most types are returned as is */
1407 *vpp = vp;
1408 break;
1409 }
1410 return (rv);
1411 }
1412
1413 /*
1414 * junction between devname and root file system, e.g. ufs
1415 */
1416 int
1417 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1418 {
1419 struct vnode *rdvp = ddv->sdev_attrvp;
1420 int rval = 0;
1421
1422 ASSERT(rdvp);
1423
1424 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1425 NULL);
1426 return (rval);
1427 }
1428
1429 static int
1430 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1431 {
1432 struct sdev_node *dv = NULL;
1433 char *nm;
1434 struct vnode *dirvp;
1435 int error;
1436 vnode_t *vp;
1437 int eof;
1438 struct iovec iov;
1439 struct uio uio;
1440 struct dirent64 *dp;
1441 dirent64_t *dbuf;
1442 size_t dbuflen;
1443 struct vattr vattr;
1444 char *link = NULL;
1445
1446 if (ddv->sdev_attrvp == NULL)
1447 return (0);
1448 if (!(ddv->sdev_flags & SDEV_BUILD))
1449 return (0);
1450
1451 dirvp = ddv->sdev_attrvp;
1452 VN_HOLD(dirvp);
1453 dbuf = kmem_zalloc(dlen, KM_SLEEP);
1454
1455 uio.uio_iov = &iov;
1456 uio.uio_iovcnt = 1;
1457 uio.uio_segflg = UIO_SYSSPACE;
1458 uio.uio_fmode = 0;
1459 uio.uio_extflg = UIO_COPY_CACHED;
1460 uio.uio_loffset = 0;
1461 uio.uio_llimit = MAXOFFSET_T;
1462
1463 eof = 0;
1464 error = 0;
1465 while (!error && !eof) {
1466 uio.uio_resid = dlen;
1467 iov.iov_base = (char *)dbuf;
1468 iov.iov_len = dlen;
1469 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1470 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1471 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1472
1473 dbuflen = dlen - uio.uio_resid;
1474 if (error || dbuflen == 0)
1475 break;
1476
1477 if (!(ddv->sdev_flags & SDEV_BUILD))
1478 break;
1479
1480 for (dp = dbuf; ((intptr_t)dp <
1481 (intptr_t)dbuf + dbuflen);
1482 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1483 nm = dp->d_name;
1484
1485 if (strcmp(nm, ".") == 0 ||
1486 strcmp(nm, "..") == 0)
1487 continue;
1488
1489 vp = NULLVP;
1490 dv = sdev_cache_lookup(ddv, nm);
1491 if (dv) {
1492 VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1493 SDEV_SIMPLE_RELE(dv);
1494 continue;
1495 }
1496
1497 /* refill the cache if not already */
1498 error = devname_backstore_lookup(ddv, nm, &vp);
1499 if (error)
1500 continue;
1501
1502 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1503 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1504 if (error)
1505 continue;
1506
1507 if (vattr.va_type == VLNK) {
1508 error = sdev_getlink(vp, &link);
1509 if (error) {
1510 continue;
1511 }
1512 ASSERT(link != NULL);
1513 }
1514
1515 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1516 rw_exit(&ddv->sdev_contents);
1517 rw_enter(&ddv->sdev_contents, RW_WRITER);
1518 }
1519 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1520 cred, SDEV_READY);
1521 rw_downgrade(&ddv->sdev_contents);
1522
1523 if (link != NULL) {
1524 kmem_free(link, strlen(link) + 1);
1525 link = NULL;
1526 }
1527
1528 if (!error) {
1529 ASSERT(dv);
1530 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1531 SDEV_SIMPLE_RELE(dv);
1532 }
1533 vp = NULL;
1534 dv = NULL;
1535 }
1536 }
1537
1538 done:
1539 VN_RELE(dirvp);
1540 kmem_free(dbuf, dlen);
1541
1542 return (error);
1543 }
1544
1545 void
1546 sdev_filldir_dynamic(struct sdev_node *ddv)
1547 {
1548 int error;
1549 int i;
1550 struct vattr vattr;
1551 struct vattr *vap = &vattr;
1552 char *nm = NULL;
1553 struct sdev_node *dv = NULL;
1554
1555 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1556 ASSERT((ddv->sdev_flags & SDEV_BUILD));
1557
1558 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */
1559 gethrestime(&vap->va_atime);
1560 vap->va_mtime = vap->va_atime;
1561 vap->va_ctime = vap->va_atime;
1562 for (i = 0; vtab[i].vt_name != NULL; i++) {
1563 /*
1564 * This early, we may be in a read-only /dev environment: leave
1565 * the creation of any nodes we'd attempt to persist to
1566 * devfsadm. Because /dev itself is normally persistent, any
1567 * node which is not marked dynamic will end up being marked
1568 * persistent. However, some nodes are both dynamic and
1569 * persistent, mostly lofi and rlofi, so we need to be careful
1570 * in our check.
1571 */
1572 if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1573 !(vtab[i].vt_flags & SDEV_DYNAMIC))
1574 continue;
1575 nm = vtab[i].vt_name;
1576 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1577 dv = NULL;
1578 error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1579 NULL, kcred, SDEV_READY);
1580 if (error) {
1581 cmn_err(CE_WARN, "%s/%s: error %d\n",
1582 ddv->sdev_name, nm, error);
1583 } else {
1584 ASSERT(dv);
1585 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1586 SDEV_SIMPLE_RELE(dv);
1587 }
1588 }
1589 }
1590
1591 /*
1592 * Creating a backing store entry based on sdev_attr.
1593 * This is called either as part of node creation in a persistent directory
1594 * or from setattr/setsecattr to persist access attributes across reboot.
1595 */
1596 int
1597 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1598 {
1599 int error = 0;
1600 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1601 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1602 struct vattr *vap = dv->sdev_attr;
1603 char *nm = dv->sdev_name;
1604 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1605
1606 ASSERT(dv && dv->sdev_name && rdvp);
1607 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1608
1609 lookup:
1610 /* try to find it in the backing store */
1611 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1612 NULL);
1613 if (error == 0) {
1614 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1615 VN_HOLD(rrvp);
1616 VN_RELE(*rvp);
1617 *rvp = rrvp;
1618 }
1619
1620 kmem_free(dv->sdev_attr, sizeof (vattr_t));
1621 dv->sdev_attr = NULL;
1622 dv->sdev_attrvp = *rvp;
1623 return (0);
1624 }
1625
1626 /* let's try to persist the node */
1627 gethrestime(&vap->va_atime);
1628 vap->va_mtime = vap->va_atime;
1629 vap->va_ctime = vap->va_atime;
1630 vap->va_mask |= AT_TYPE|AT_MODE;
1631 switch (vap->va_type) {
1632 case VDIR:
1633 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1634 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1635 (void *)(*rvp), error));
1636 if (!error)
1637 VN_RELE(*rvp);
1638 break;
1639 case VCHR:
1640 case VBLK:
1641 case VREG:
1642 case VDOOR:
1643 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1644 rvp, cred, 0, NULL, NULL);
1645 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1646 (void *)(*rvp), error));
1647 if (!error)
1648 VN_RELE(*rvp);
1649 break;
1650 case VLNK:
1651 ASSERT(dv->sdev_symlink);
1652 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1653 NULL, 0);
1654 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1655 error));
1656 break;
1657 default:
1658 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1659 "create\n", nm);
1660 /*NOTREACHED*/
1661 }
1662
1663 /* go back to lookup to factor out spec node and set attrvp */
1664 if (error == 0)
1665 goto lookup;
1666
1667 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1668 return (error);
1669 }
1670
1671 static void
1672 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1673 {
1674 struct sdev_node *dup = NULL;
1675
1676 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1677 if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1678 sdev_direnter(ddv, *dv);
1679 } else {
1680 VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1681 SDEV_SIMPLE_RELE(*dv);
1682 sdev_nodedestroy(*dv, 0);
1683 *dv = dup;
1684 }
1685 }
1686
1687 static void
1688 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1689 {
1690 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1691 sdev_dirdelete(ddv, *dv);
1692 }
1693
1694 /*
1695 * update the in-core directory cache
1696 */
1697 void
1698 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1699 sdev_cache_ops_t ops)
1700 {
1701 ASSERT((SDEV_HELD(*dv)));
1702
1703 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1704 switch (ops) {
1705 case SDEV_CACHE_ADD:
1706 sdev_cache_add(ddv, dv, nm);
1707 break;
1708 case SDEV_CACHE_DELETE:
1709 sdev_cache_delete(ddv, dv);
1710 break;
1711 default:
1712 break;
1713 }
1714 }
1715
1716 /*
1717 * retrieve the named entry from the directory cache
1718 */
1719 struct sdev_node *
1720 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1721 {
1722 struct sdev_node *dv = NULL;
1723
1724 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1725 dv = sdev_findbyname(ddv, nm);
1726
1727 return (dv);
1728 }
1729
1730 /*
1731 * Implicit reconfig for nodes constructed by a link generator
1732 * Start devfsadm if needed, or if devfsadm is in progress,
1733 * prepare to block on devfsadm either completing or
1734 * constructing the desired node. As devfsadmd is global
1735 * in scope, constructing all necessary nodes, we only
1736 * need to initiate it once.
1737 */
1738 static int
1739 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1740 {
1741 int error = 0;
1742
1743 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1744 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1745 ddv->sdev_name, nm, devfsadm_state));
1746 mutex_enter(&dv->sdev_lookup_lock);
1747 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1748 mutex_exit(&dv->sdev_lookup_lock);
1749 error = 0;
1750 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1751 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1752 ddv->sdev_name, nm, devfsadm_state));
1753
1754 sdev_devfsadmd_thread(ddv, dv, kcred);
1755 mutex_enter(&dv->sdev_lookup_lock);
1756 SDEV_BLOCK_OTHERS(dv,
1757 (SDEV_LOOKUP | SDEV_LGWAITING));
1758 mutex_exit(&dv->sdev_lookup_lock);
1759 error = 0;
1760 } else {
1761 error = -1;
1762 }
1763
1764 return (error);
1765 }
1766
1767 /*
1768 * Support for specialized device naming construction mechanisms
1769 */
1770 static int
1771 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1772 int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1773 void *, char *), int flags, struct cred *cred)
1774 {
1775 int rv = 0;
1776 char *physpath = NULL;
1777 struct vattr vattr;
1778 struct vattr *vap = &vattr;
1779 struct sdev_node *dv = NULL;
1780
1781 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1782 if (flags & SDEV_VLINK) {
1783 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1784 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1785 NULL);
1786 if (rv) {
1787 kmem_free(physpath, MAXPATHLEN);
1788 return (-1);
1789 }
1790
1791 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */
1792 vap->va_size = strlen(physpath);
1793 gethrestime(&vap->va_atime);
1794 vap->va_mtime = vap->va_atime;
1795 vap->va_ctime = vap->va_atime;
1796
1797 rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1798 (void *)physpath, cred, SDEV_READY);
1799 kmem_free(physpath, MAXPATHLEN);
1800 if (rv)
1801 return (rv);
1802 } else if (flags & SDEV_VATTR) {
1803 /*
1804 * /dev/pts
1805 *
1806 * callback is responsible to set the basic attributes,
1807 * e.g. va_type/va_uid/va_gid/
1808 * dev_t if VCHR or VBLK/
1809 */
1810 ASSERT(callback);
1811 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1812 if (rv) {
1813 sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1814 "callback failed \n"));
1815 return (-1);
1816 }
1817
1818 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1819 cred, SDEV_READY);
1820
1821 if (rv)
1822 return (rv);
1823
1824 } else {
1825 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1826 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1827 __LINE__));
1828 rv = -1;
1829 }
1830
1831 *dvp = dv;
1832 return (rv);
1833 }
1834
1835 static int
1836 is_devfsadm_thread(char *exec_name)
1837 {
1838 /*
1839 * note: because devfsadmd -> /usr/sbin/devfsadm
1840 * it is safe to use "devfsadm" to capture the lookups
1841 * from devfsadm and its daemon version.
1842 */
1843 if (strcmp(exec_name, "devfsadm") == 0)
1844 return (1);
1845 return (0);
1846 }
1847
1848 /*
1849 * Lookup Order:
1850 * sdev_node cache;
1851 * backing store (SDEV_PERSIST);
1852 * DBNR: a. dir_ops implemented in the loadable modules;
1853 * b. vnode ops in vtab.
1854 */
1855 int
1856 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1857 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1858 struct cred *, void *, char *), int flags)
1859 {
1860 int rv = 0, nmlen;
1861 struct vnode *rvp = NULL;
1862 struct sdev_node *dv = NULL;
1863 int retried = 0;
1864 int error = 0;
1865 struct vattr vattr;
1866 char *lookup_thread = curproc->p_user.u_comm;
1867 int failed_flags = 0;
1868 int (*vtor)(struct sdev_node *) = NULL;
1869 int state;
1870 int parent_state;
1871 char *link = NULL;
1872
1873 if (SDEVTOV(ddv)->v_type != VDIR)
1874 return (ENOTDIR);
1875
1876 /*
1877 * Empty name or ., return node itself.
1878 */
1879 nmlen = strlen(nm);
1880 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1881 *vpp = SDEVTOV(ddv);
1882 VN_HOLD(*vpp);
1883 return (0);
1884 }
1885
1886 /*
1887 * .., return the parent directory
1888 */
1889 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1890 *vpp = SDEVTOV(ddv->sdev_dotdot);
1891 VN_HOLD(*vpp);
1892 return (0);
1893 }
1894
1895 rw_enter(&ddv->sdev_contents, RW_READER);
1896 if (ddv->sdev_flags & SDEV_VTOR) {
1897 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1898 ASSERT(vtor);
1899 }
1900
1901 tryagain:
1902 /*
1903 * (a) directory cache lookup:
1904 */
1905 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1906 parent_state = ddv->sdev_state;
1907 dv = sdev_cache_lookup(ddv, nm);
1908 if (dv) {
1909 state = dv->sdev_state;
1910 switch (state) {
1911 case SDEV_INIT:
1912 if (is_devfsadm_thread(lookup_thread))
1913 break;
1914
1915 /* ZOMBIED parent won't allow node creation */
1916 if (parent_state == SDEV_ZOMBIE) {
1917 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1918 retried);
1919 goto nolock_notfound;
1920 }
1921
1922 mutex_enter(&dv->sdev_lookup_lock);
1923 /* compensate the threads started after devfsadm */
1924 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1925 !(SDEV_IS_LOOKUP(dv)))
1926 SDEV_BLOCK_OTHERS(dv,
1927 (SDEV_LOOKUP | SDEV_LGWAITING));
1928
1929 if (SDEV_IS_LOOKUP(dv)) {
1930 failed_flags |= SLF_REBUILT;
1931 rw_exit(&ddv->sdev_contents);
1932 error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1933 mutex_exit(&dv->sdev_lookup_lock);
1934 rw_enter(&ddv->sdev_contents, RW_READER);
1935
1936 if (error != 0) {
1937 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1938 retried);
1939 goto nolock_notfound;
1940 }
1941
1942 state = dv->sdev_state;
1943 if (state == SDEV_INIT) {
1944 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1945 retried);
1946 goto nolock_notfound;
1947 } else if (state == SDEV_READY) {
1948 goto found;
1949 } else if (state == SDEV_ZOMBIE) {
1950 rw_exit(&ddv->sdev_contents);
1951 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1952 retried);
1953 SDEV_RELE(dv);
1954 goto lookup_failed;
1955 }
1956 } else {
1957 mutex_exit(&dv->sdev_lookup_lock);
1958 }
1959 break;
1960 case SDEV_READY:
1961 goto found;
1962 case SDEV_ZOMBIE:
1963 rw_exit(&ddv->sdev_contents);
1964 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1965 SDEV_RELE(dv);
1966 goto lookup_failed;
1967 default:
1968 rw_exit(&ddv->sdev_contents);
1969 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1970 sdev_lookup_failed(ddv, nm, failed_flags);
1971 *vpp = NULLVP;
1972 return (ENOENT);
1973 }
1974 }
1975 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1976
1977 /*
1978 * ZOMBIED parent does not allow new node creation.
1979 * bail out early
1980 */
1981 if (parent_state == SDEV_ZOMBIE) {
1982 rw_exit(&ddv->sdev_contents);
1983 *vpp = NULLVP;
1984 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1985 return (ENOENT);
1986 }
1987
1988 /*
1989 * (b0): backing store lookup
1990 * SDEV_PERSIST is default except:
1991 * 1) pts nodes
1992 * 2) non-chmod'ed local nodes
1993 * 3) zvol nodes
1994 */
1995 if (SDEV_IS_PERSIST(ddv)) {
1996 error = devname_backstore_lookup(ddv, nm, &rvp);
1997
1998 if (!error) {
1999
2000 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
2001 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2002 if (error) {
2003 rw_exit(&ddv->sdev_contents);
2004 if (dv)
2005 SDEV_RELE(dv);
2006 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2007 sdev_lookup_failed(ddv, nm, failed_flags);
2008 *vpp = NULLVP;
2009 return (ENOENT);
2010 }
2011
2012 if (vattr.va_type == VLNK) {
2013 error = sdev_getlink(rvp, &link);
2014 if (error) {
2015 rw_exit(&ddv->sdev_contents);
2016 if (dv)
2017 SDEV_RELE(dv);
2018 SD_TRACE_FAILED_LOOKUP(ddv, nm,
2019 retried);
2020 sdev_lookup_failed(ddv, nm,
2021 failed_flags);
2022 *vpp = NULLVP;
2023 return (ENOENT);
2024 }
2025 ASSERT(link != NULL);
2026 }
2027
2028 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2029 rw_exit(&ddv->sdev_contents);
2030 rw_enter(&ddv->sdev_contents, RW_WRITER);
2031 }
2032 error = sdev_mknode(ddv, nm, &dv, &vattr,
2033 rvp, link, cred, SDEV_READY);
2034 rw_downgrade(&ddv->sdev_contents);
2035
2036 if (link != NULL) {
2037 kmem_free(link, strlen(link) + 1);
2038 link = NULL;
2039 }
2040
2041 if (error) {
2042 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2043 rw_exit(&ddv->sdev_contents);
2044 if (dv)
2045 SDEV_RELE(dv);
2046 goto lookup_failed;
2047 } else {
2048 goto found;
2049 }
2050 } else if (retried) {
2051 rw_exit(&ddv->sdev_contents);
2052 sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2053 ddv->sdev_name, nm));
2054 if (dv)
2055 SDEV_RELE(dv);
2056 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2057 sdev_lookup_failed(ddv, nm, failed_flags);
2058 *vpp = NULLVP;
2059 return (ENOENT);
2060 }
2061 }
2062
2063 lookup_create_node:
2064 /* first thread that is doing the lookup on this node */
2065 if (callback) {
2066 ASSERT(dv == NULL);
2067 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2068 rw_exit(&ddv->sdev_contents);
2069 rw_enter(&ddv->sdev_contents, RW_WRITER);
2070 }
2071 error = sdev_call_dircallback(ddv, &dv, nm, callback,
2072 flags, cred);
2073 rw_downgrade(&ddv->sdev_contents);
2074 if (error == 0) {
2075 goto found;
2076 } else {
2077 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2078 rw_exit(&ddv->sdev_contents);
2079 goto lookup_failed;
2080 }
2081 }
2082 if (!dv) {
2083 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2084 rw_exit(&ddv->sdev_contents);
2085 rw_enter(&ddv->sdev_contents, RW_WRITER);
2086 }
2087 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2088 cred, SDEV_INIT);
2089 if (!dv) {
2090 rw_exit(&ddv->sdev_contents);
2091 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2092 sdev_lookup_failed(ddv, nm, failed_flags);
2093 *vpp = NULLVP;
2094 return (ENOENT);
2095 }
2096 rw_downgrade(&ddv->sdev_contents);
2097 }
2098
2099 /*
2100 * (b1) invoking devfsadm once per life time for devfsadm nodes
2101 */
2102 ASSERT(SDEV_HELD(dv));
2103
2104 if (SDEV_IS_NO_NCACHE(dv))
2105 failed_flags |= SLF_NO_NCACHE;
2106 if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2107 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2108 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2109 ASSERT(SDEV_HELD(dv));
2110 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2111 goto nolock_notfound;
2112 }
2113
2114 /*
2115 * filter out known non-existent devices recorded
2116 * during initial reconfiguration boot for which
2117 * reconfig should not be done and lookup may
2118 * be short-circuited now.
2119 */
2120 if (sdev_lookup_filter(ddv, nm)) {
2121 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2122 goto nolock_notfound;
2123 }
2124
2125 /* bypassing devfsadm internal nodes */
2126 if (is_devfsadm_thread(lookup_thread)) {
2127 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2128 goto nolock_notfound;
2129 }
2130
2131 if (sdev_reconfig_disable) {
2132 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2133 goto nolock_notfound;
2134 }
2135
2136 error = sdev_call_devfsadmd(ddv, dv, nm);
2137 if (error == 0) {
2138 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2139 ddv->sdev_name, nm, curproc->p_user.u_comm));
2140 if (sdev_reconfig_verbose) {
2141 cmn_err(CE_CONT,
2142 "?lookup of %s/%s by %s: reconfig\n",
2143 ddv->sdev_name, nm, curproc->p_user.u_comm);
2144 }
2145 retried = 1;
2146 failed_flags |= SLF_REBUILT;
2147 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2148 SDEV_SIMPLE_RELE(dv);
2149 goto tryagain;
2150 } else {
2151 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2152 goto nolock_notfound;
2153 }
2154
2155 found:
2156 ASSERT(dv->sdev_state == SDEV_READY);
2157 if (vtor) {
2158 /*
2159 * Check validity of returned node
2160 */
2161 switch (vtor(dv)) {
2162 case SDEV_VTOR_VALID:
2163 break;
2164 case SDEV_VTOR_STALE:
2165 /*
2166 * The name exists, but the cache entry is
2167 * stale and needs to be re-created.
2168 */
2169 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2170 if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2171 rw_exit(&ddv->sdev_contents);
2172 rw_enter(&ddv->sdev_contents, RW_WRITER);
2173 }
2174 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2175 rw_downgrade(&ddv->sdev_contents);
2176 SDEV_RELE(dv);
2177 dv = NULL;
2178 goto lookup_create_node;
2179 /* FALLTHRU */
2180 case SDEV_VTOR_INVALID:
2181 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2182 sdcmn_err7(("lookup: destroy invalid "
2183 "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2184 goto nolock_notfound;
2185 case SDEV_VTOR_SKIP:
2186 sdcmn_err7(("lookup: node not applicable - "
2187 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2188 rw_exit(&ddv->sdev_contents);
2189 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2190 SDEV_RELE(dv);
2191 goto lookup_failed;
2192 default:
2193 cmn_err(CE_PANIC,
2194 "dev fs: validator failed: %s(%p)\n",
2195 dv->sdev_name, (void *)dv);
2196 break;
2197 }
2198 }
2199
2200 rw_exit(&ddv->sdev_contents);
2201 rv = sdev_to_vp(dv, vpp);
2202 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2203 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2204 dv->sdev_state, nm, rv));
2205 return (rv);
2206
2207 nolock_notfound:
2208 /*
2209 * Destroy the node that is created for synchronization purposes.
2210 */
2211 sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2212 nm, dv->sdev_state));
2213 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2214 if (dv->sdev_state == SDEV_INIT) {
2215 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2216 rw_exit(&ddv->sdev_contents);
2217 rw_enter(&ddv->sdev_contents, RW_WRITER);
2218 }
2219
2220 /*
2221 * Node state may have changed during the lock
2222 * changes. Re-check.
2223 */
2224 if (dv->sdev_state == SDEV_INIT) {
2225 sdev_dirdelete(ddv, dv);
2226 rw_exit(&ddv->sdev_contents);
2227 sdev_lookup_failed(ddv, nm, failed_flags);
2228 SDEV_RELE(dv);
2229 *vpp = NULL;
2230 return (ENOENT);
2231 }
2232 }
2233
2234 rw_exit(&ddv->sdev_contents);
2235 SDEV_RELE(dv);
2236
2237 lookup_failed:
2238 sdev_lookup_failed(ddv, nm, failed_flags);
2239 *vpp = NULL;
2240 return (ENOENT);
2241 }
2242
2243 /*
2244 * Given a directory node, mark all nodes beneath as
2245 * STALE, i.e. nodes that don't exist as far as new
2246 * consumers are concerned. Remove them from the
2247 * list of directory entries so that no lookup or
2248 * directory traversal will find them. The node
2249 * not deallocated so existing holds are not affected.
2250 */
2251 void
2252 sdev_stale(struct sdev_node *ddv)
2253 {
2254 struct sdev_node *dv;
2255 struct vnode *vp;
2256
2257 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2258
2259 rw_enter(&ddv->sdev_contents, RW_WRITER);
2260 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2261 vp = SDEVTOV(dv);
2262 SDEV_HOLD(dv);
2263 if (vp->v_type == VDIR)
2264 sdev_stale(dv);
2265
2266 sdev_dirdelete(ddv, dv);
2267 SDEV_RELE(dv);
2268 }
2269 ddv->sdev_flags |= SDEV_BUILD;
2270 rw_exit(&ddv->sdev_contents);
2271 }
2272
2273 /*
2274 * Given a directory node, clean out all the nodes beneath.
2275 * If expr is specified, clean node with names matching expr.
2276 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2277 * so they are excluded from future lookups.
2278 */
2279 int
2280 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2281 {
2282 int error = 0;
2283 int busy = 0;
2284 struct vnode *vp;
2285 struct sdev_node *dv, *next;
2286 int bkstore = 0;
2287 int len = 0;
2288 char *bks_name = NULL;
2289
2290 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2291
2292 /*
2293 * We try our best to destroy all unused sdev_node's
2294 */
2295 rw_enter(&ddv->sdev_contents, RW_WRITER);
2296 for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) {
2297 next = SDEV_NEXT_ENTRY(ddv, dv);
2298 vp = SDEVTOV(dv);
2299
2300 if (expr && gmatch(dv->sdev_name, expr) == 0)
2301 continue;
2302
2303 if (vp->v_type == VDIR &&
2304 sdev_cleandir(dv, NULL, flags) != 0) {
2305 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2306 dv->sdev_name));
2307 busy++;
2308 continue;
2309 }
2310
2311 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2312 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2313 dv->sdev_name));
2314 busy++;
2315 continue;
2316 }
2317
2318 /*
2319 * at this point, either dv is not held or SDEV_ENFORCE
2320 * is specified. In either case, dv needs to be deleted
2321 */
2322 SDEV_HOLD(dv);
2323
2324 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2325 if (bkstore && (vp->v_type == VDIR))
2326 bkstore += 1;
2327
2328 if (bkstore) {
2329 len = strlen(dv->sdev_name) + 1;
2330 bks_name = kmem_alloc(len, KM_SLEEP);
2331 bcopy(dv->sdev_name, bks_name, len);
2332 }
2333
2334 sdev_dirdelete(ddv, dv);
2335
2336 /* take care the backing store clean up */
2337 if (bkstore) {
2338 ASSERT(bks_name);
2339 ASSERT(ddv->sdev_attrvp);
2340
2341 if (bkstore == 1) {
2342 error = VOP_REMOVE(ddv->sdev_attrvp,
2343 bks_name, kcred, NULL, 0);
2344 } else if (bkstore == 2) {
2345 error = VOP_RMDIR(ddv->sdev_attrvp,
2346 bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2347 }
2348
2349 /* do not propagate the backing store errors */
2350 if (error) {
2351 sdcmn_err9(("sdev_cleandir: backing store"
2352 "not cleaned\n"));
2353 error = 0;
2354 }
2355
2356 bkstore = 0;
2357 kmem_free(bks_name, len);
2358 bks_name = NULL;
2359 len = 0;
2360 }
2361
2362 ddv->sdev_flags |= SDEV_BUILD;
2363 SDEV_RELE(dv);
2364 }
2365
2366 ddv->sdev_flags |= SDEV_BUILD;
2367 rw_exit(&ddv->sdev_contents);
2368
2369 if (busy) {
2370 error = EBUSY;
2371 }
2372
2373 return (error);
2374 }
2375
2376 /*
2377 * a convenient wrapper for readdir() funcs
2378 */
2379 size_t
2380 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2381 {
2382 size_t reclen = DIRENT64_RECLEN(strlen(nm));
2383 if (reclen > size)
2384 return (0);
2385
2386 de->d_ino = (ino64_t)ino;
2387 de->d_off = (off64_t)off + 1;
2388 de->d_reclen = (ushort_t)reclen;
2389 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2390 return (reclen);
2391 }
2392
2393 /*
2394 * sdev_mount service routines
2395 */
2396 int
2397 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2398 {
2399 int error;
2400
2401 if (uap->datalen != sizeof (*args))
2402 return (EINVAL);
2403
2404 if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2405 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2406 "get user data. error %d\n", error);
2407 return (EFAULT);
2408 }
2409
2410 return (0);
2411 }
2412
2413 #ifdef nextdp
2414 #undef nextdp
2415 #endif
2416 #define nextdp(dp) ((struct dirent64 *) \
2417 (intptr_t)((char *)(dp) + (dp)->d_reclen))
2418
2419 /*
2420 * readdir helper func
2421 */
2422 int
2423 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2424 int flags)
2425 {
2426 struct sdev_node *ddv = VTOSDEV(vp);
2427 struct sdev_node *dv;
2428 dirent64_t *dp;
2429 ulong_t outcount = 0;
2430 size_t namelen;
2431 ulong_t alloc_count;
2432 void *outbuf;
2433 struct iovec *iovp;
2434 int error = 0;
2435 size_t reclen;
2436 offset_t diroff;
2437 offset_t soff;
2438 int this_reclen;
2439 int (*vtor)(struct sdev_node *) = NULL;
2440 struct vattr attr;
2441 timestruc_t now;
2442
2443 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2444 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2445
2446 if (uiop->uio_loffset >= MAXOFF_T) {
2447 if (eofp)
2448 *eofp = 1;
2449 return (0);
2450 }
2451
2452 if (uiop->uio_iovcnt != 1)
2453 return (EINVAL);
2454
2455 if (vp->v_type != VDIR)
2456 return (ENOTDIR);
2457
2458 if (ddv->sdev_flags & SDEV_VTOR) {
2459 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2460 ASSERT(vtor);
2461 }
2462
2463 if (eofp != NULL)
2464 *eofp = 0;
2465
2466 soff = uiop->uio_loffset;
2467 iovp = uiop->uio_iov;
2468 alloc_count = iovp->iov_len;
2469 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2470 outcount = 0;
2471
2472 if (ddv->sdev_state == SDEV_ZOMBIE)
2473 goto get_cache;
2474
2475 if (SDEV_IS_GLOBAL(ddv)) {
2476
2477 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2478 !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2479 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2480 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2481 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2482 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2483 !sdev_reconfig_disable) {
2484 /*
2485 * invoking "devfsadm" to do system device reconfig
2486 */
2487 mutex_enter(&ddv->sdev_lookup_lock);
2488 SDEV_BLOCK_OTHERS(ddv,
2489 (SDEV_READDIR|SDEV_LGWAITING));
2490 mutex_exit(&ddv->sdev_lookup_lock);
2491
2492 sdcmn_err8(("readdir of %s by %s: reconfig\n",
2493 ddv->sdev_path, curproc->p_user.u_comm));
2494 if (sdev_reconfig_verbose) {
2495 cmn_err(CE_CONT,
2496 "?readdir of %s by %s: reconfig\n",
2497 ddv->sdev_path, curproc->p_user.u_comm);
2498 }
2499
2500 sdev_devfsadmd_thread(ddv, NULL, kcred);
2501 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2502 /*
2503 * compensate the "ls" started later than "devfsadm"
2504 */
2505 mutex_enter(&ddv->sdev_lookup_lock);
2506 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2507 mutex_exit(&ddv->sdev_lookup_lock);
2508 }
2509
2510 /*
2511 * release the contents lock so that
2512 * the cache may be updated by devfsadmd
2513 */
2514 rw_exit(&ddv->sdev_contents);
2515 mutex_enter(&ddv->sdev_lookup_lock);
2516 if (SDEV_IS_READDIR(ddv))
2517 (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2518 mutex_exit(&ddv->sdev_lookup_lock);
2519 rw_enter(&ddv->sdev_contents, RW_READER);
2520
2521 sdcmn_err4(("readdir of directory %s by %s\n",
2522 ddv->sdev_name, curproc->p_user.u_comm));
2523 if (ddv->sdev_flags & SDEV_BUILD) {
2524 if (SDEV_IS_PERSIST(ddv)) {
2525 error = sdev_filldir_from_store(ddv,
2526 alloc_count, cred);
2527 }
2528 ddv->sdev_flags &= ~SDEV_BUILD;
2529 }
2530 }
2531
2532 get_cache:
2533 /* handle "." and ".." */
2534 diroff = 0;
2535 if (soff == 0) {
2536 /* first time */
2537 this_reclen = DIRENT64_RECLEN(1);
2538 if (alloc_count < this_reclen) {
2539 error = EINVAL;
2540 goto done;
2541 }
2542
2543 dp->d_ino = (ino64_t)ddv->sdev_ino;
2544 dp->d_off = (off64_t)1;
2545 dp->d_reclen = (ushort_t)this_reclen;
2546
2547 (void) strncpy(dp->d_name, ".",
2548 DIRENT64_NAMELEN(this_reclen));
2549 outcount += dp->d_reclen;
2550 dp = nextdp(dp);
2551 }
2552
2553 diroff++;
2554 if (soff <= 1) {
2555 this_reclen = DIRENT64_RECLEN(2);
2556 if (alloc_count < outcount + this_reclen) {
2557 error = EINVAL;
2558 goto done;
2559 }
2560
2561 dp->d_reclen = (ushort_t)this_reclen;
2562 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2563 dp->d_off = (off64_t)2;
2564
2565 (void) strncpy(dp->d_name, "..",
2566 DIRENT64_NAMELEN(this_reclen));
2567 outcount += dp->d_reclen;
2568
2569 dp = nextdp(dp);
2570 }
2571
2572
2573 /* gets the cache */
2574 diroff++;
2575 for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2576 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2577 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2578 diroff, soff, dv->sdev_name));
2579
2580 /* bypassing pre-matured nodes */
2581 if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2582 sdcmn_err3(("sdev_readdir: pre-mature node "
2583 "%s %d\n", dv->sdev_name, dv->sdev_state));
2584 continue;
2585 }
2586
2587 /*
2588 * Check validity of node
2589 * Drop invalid and nodes to be skipped.
2590 * A node the validator indicates as stale needs
2591 * to be returned as presumably the node name itself
2592 * is valid and the node data itself will be refreshed
2593 * on lookup. An application performing a readdir then
2594 * stat on each entry should thus always see consistent
2595 * data. In any case, it is not possible to synchronize
2596 * with dynamic kernel state, and any view we return can
2597 * never be anything more than a snapshot at a point in time.
2598 */
2599 if (vtor) {
2600 switch (vtor(dv)) {
2601 case SDEV_VTOR_VALID:
2602 break;
2603 case SDEV_VTOR_INVALID:
2604 case SDEV_VTOR_SKIP:
2605 continue;
2606 case SDEV_VTOR_STALE:
2607 sdcmn_err3(("sdev_readir: %s stale\n",
2608 dv->sdev_name));
2609 break;
2610 default:
2611 cmn_err(CE_PANIC,
2612 "dev fs: validator failed: %s(%p)\n",
2613 dv->sdev_name, (void *)dv);
2614 break;
2615 /*NOTREACHED*/
2616 }
2617 }
2618
2619 namelen = strlen(dv->sdev_name);
2620 reclen = DIRENT64_RECLEN(namelen);
2621 if (outcount + reclen > alloc_count) {
2622 goto full;
2623 }
2624 dp->d_reclen = (ushort_t)reclen;
2625 dp->d_ino = (ino64_t)dv->sdev_ino;
2626 dp->d_off = (off64_t)diroff + 1;
2627 (void) strncpy(dp->d_name, dv->sdev_name,
2628 DIRENT64_NAMELEN(reclen));
2629 outcount += reclen;
2630 dp = nextdp(dp);
2631 }
2632
2633 full:
2634 sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2635 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2636 (void *)dv));
2637
2638 if (outcount)
2639 error = uiomove(outbuf, outcount, UIO_READ, uiop);
2640
2641 if (!error) {
2642 uiop->uio_loffset = diroff;
2643 if (eofp)
2644 *eofp = dv ? 0 : 1;
2645 }
2646
2647
2648 if (ddv->sdev_attrvp) {
2649 gethrestime(&now);
2650 attr.va_ctime = now;
2651 attr.va_atime = now;
2652 attr.va_mask = AT_CTIME|AT_ATIME;
2653
2654 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2655 }
2656 done:
2657 kmem_free(outbuf, alloc_count);
2658 return (error);
2659 }
2660
2661 static int
2662 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2663 {
2664 vnode_t *vp;
2665 vnode_t *cvp;
2666 struct sdev_node *svp;
2667 char *nm;
2668 struct pathname pn;
2669 int error;
2670 int persisted = 0;
2671
2672 ASSERT(INGLOBALZONE(curproc));
2673
2674 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2675 return (error);
2676 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2677
2678 vp = rootdir;
2679 VN_HOLD(vp);
2680
2681 while (pn_pathleft(&pn)) {
2682 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2683 (void) pn_getcomponent(&pn, nm);
2684
2685 /*
2686 * Deal with the .. special case where we may be
2687 * traversing up across a mount point, to the
2688 * root of this filesystem or global root.
2689 */
2690 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2691 checkforroot:
2692 if (VN_CMP(vp, rootdir)) {
2693 nm[1] = 0;
2694 } else if (vp->v_flag & VROOT) {
2695 vfs_t *vfsp;
2696 cvp = vp;
2697 vfsp = cvp->v_vfsp;
2698 vfs_rlock_wait(vfsp);
2699 vp = cvp->v_vfsp->vfs_vnodecovered;
2700 if (vp == NULL ||
2701 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2702 vfs_unlock(vfsp);
2703 VN_RELE(cvp);
2704 error = EIO;
2705 break;
2706 }
2707 VN_HOLD(vp);
2708 vfs_unlock(vfsp);
2709 VN_RELE(cvp);
2710 cvp = NULL;
2711 goto checkforroot;
2712 }
2713 }
2714
2715 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2716 NULL, NULL);
2717 if (error) {
2718 VN_RELE(vp);
2719 break;
2720 }
2721
2722 /* traverse mount points encountered on our journey */
2723 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2724 VN_RELE(vp);
2725 VN_RELE(cvp);
2726 break;
2727 }
2728
2729 /*
2730 * symbolic link, can be either relative and absolute
2731 */
2732 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2733 struct pathname linkpath;
2734 pn_alloc(&linkpath);
2735 if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2736 pn_free(&linkpath);
2737 break;
2738 }
2739 if (pn_pathleft(&linkpath) == 0)
2740 (void) pn_set(&linkpath, ".");
2741 error = pn_insert(&pn, &linkpath, strlen(nm));
2742 pn_free(&linkpath);
2743 if (pn.pn_pathlen == 0) {
2744 VN_RELE(vp);
2745 return (ENOENT);
2746 }
2747 if (pn.pn_path[0] == '/') {
2748 pn_skipslash(&pn);
2749 VN_RELE(vp);
2750 VN_RELE(cvp);
2751 vp = rootdir;
2752 VN_HOLD(vp);
2753 } else {
2754 VN_RELE(cvp);
2755 }
2756 continue;
2757 }
2758
2759 VN_RELE(vp);
2760
2761 /*
2762 * Direct the operation to the persisting filesystem
2763 * underlying /dev. Bail if we encounter a
2764 * non-persistent dev entity here.
2765 */
2766 if (cvp->v_vfsp->vfs_fstype == devtype) {
2767
2768 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2769 error = ENOENT;
2770 VN_RELE(cvp);
2771 break;
2772 }
2773
2774 if (VTOSDEV(cvp) == NULL) {
2775 error = ENOENT;
2776 VN_RELE(cvp);
2777 break;
2778 }
2779 svp = VTOSDEV(cvp);
2780 if ((vp = svp->sdev_attrvp) == NULL) {
2781 error = ENOENT;
2782 VN_RELE(cvp);
2783 break;
2784 }
2785 persisted = 1;
2786 VN_HOLD(vp);
2787 VN_RELE(cvp);
2788 cvp = vp;
2789 }
2790
2791 vp = cvp;
2792 pn_skipslash(&pn);
2793 }
2794
2795 kmem_free(nm, MAXNAMELEN);
2796 pn_free(&pn);
2797
2798 if (error)
2799 return (error);
2800
2801 /*
2802 * Only return persisted nodes in the filesystem underlying /dev.
2803 */
2804 if (!persisted) {
2805 VN_RELE(vp);
2806 return (ENOENT);
2807 }
2808
2809 *r_vp = vp;
2810 return (0);
2811 }
2812
2813 int
2814 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2815 int *npathsp, int *npathsp_alloc, int checking_empty)
2816 {
2817 char **pathlist = NULL;
2818 char **newlist = NULL;
2819 int npaths = 0;
2820 int npaths_alloc = 0;
2821 dirent64_t *dbuf = NULL;
2822 int n;
2823 char *s;
2824 int error;
2825 vnode_t *vp;
2826 int eof;
2827 struct iovec iov;
2828 struct uio uio;
2829 struct dirent64 *dp;
2830 size_t dlen;
2831 size_t dbuflen;
2832 int ndirents = 64;
2833 char *nm;
2834
2835 error = sdev_modctl_lookup(dir, &vp);
2836 sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2837 dir, curproc->p_user.u_comm,
2838 (error == 0) ? "ok" : "failed"));
2839 if (error)
2840 return (error);
2841
2842 dlen = ndirents * (sizeof (*dbuf));
2843 dbuf = kmem_alloc(dlen, KM_SLEEP);
2844
2845 uio.uio_iov = &iov;
2846 uio.uio_iovcnt = 1;
2847 uio.uio_segflg = UIO_SYSSPACE;
2848 uio.uio_fmode = 0;
2849 uio.uio_extflg = UIO_COPY_CACHED;
2850 uio.uio_loffset = 0;
2851 uio.uio_llimit = MAXOFFSET_T;
2852
2853 eof = 0;
2854 error = 0;
2855 while (!error && !eof) {
2856 uio.uio_resid = dlen;
2857 iov.iov_base = (char *)dbuf;
2858 iov.iov_len = dlen;
2859
2860 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2861 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2862 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2863
2864 dbuflen = dlen - uio.uio_resid;
2865
2866 if (error || dbuflen == 0)
2867 break;
2868
2869 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2870 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2871
2872 nm = dp->d_name;
2873
2874 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2875 continue;
2876 if (npaths == npaths_alloc) {
2877 npaths_alloc += 64;
2878 newlist = (char **)
2879 kmem_zalloc((npaths_alloc + 1) *
2880 sizeof (char *), KM_SLEEP);
2881 if (pathlist) {
2882 bcopy(pathlist, newlist,
2883 npaths * sizeof (char *));
2884 kmem_free(pathlist,
2885 (npaths + 1) * sizeof (char *));
2886 }
2887 pathlist = newlist;
2888 }
2889 n = strlen(nm) + 1;
2890 s = kmem_alloc(n, KM_SLEEP);
2891 bcopy(nm, s, n);
2892 pathlist[npaths++] = s;
2893 sdcmn_err11((" %s/%s\n", dir, s));
2894
2895 /* if checking empty, one entry is as good as many */
2896 if (checking_empty) {
2897 eof = 1;
2898 break;
2899 }
2900 }
2901 }
2902
2903 exit:
2904 VN_RELE(vp);
2905
2906 if (dbuf)
2907 kmem_free(dbuf, dlen);
2908
2909 if (error)
2910 return (error);
2911
2912 *dirlistp = pathlist;
2913 *npathsp = npaths;
2914 *npathsp_alloc = npaths_alloc;
2915
2916 return (0);
2917 }
2918
2919 void
2920 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2921 {
2922 int i, n;
2923
2924 for (i = 0; i < npaths; i++) {
2925 n = strlen(pathlist[i]) + 1;
2926 kmem_free(pathlist[i], n);
2927 }
2928
2929 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2930 }
2931
2932 int
2933 sdev_modctl_devexists(const char *path)
2934 {
2935 vnode_t *vp;
2936 int error;
2937
2938 error = sdev_modctl_lookup(path, &vp);
2939 sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2940 path, curproc->p_user.u_comm,
2941 (error == 0) ? "ok" : "failed"));
2942 if (error == 0)
2943 VN_RELE(vp);
2944
2945 return (error);
2946 }
2947
2948 extern int sdev_vnodeops_tbl_size;
2949
2950 /*
2951 * construct a new template with overrides from vtab
2952 */
2953 static fs_operation_def_t *
2954 sdev_merge_vtab(const fs_operation_def_t tab[])
2955 {
2956 fs_operation_def_t *new;
2957 const fs_operation_def_t *tab_entry;
2958
2959 /* make a copy of standard vnode ops table */
2960 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2961 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2962
2963 /* replace the overrides from tab */
2964 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2965 fs_operation_def_t *std_entry = new;
2966 while (std_entry->name) {
2967 if (strcmp(tab_entry->name, std_entry->name) == 0) {
2968 std_entry->func = tab_entry->func;
2969 break;
2970 }
2971 std_entry++;
2972 }
2973 if (std_entry->name == NULL)
2974 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2975 tab_entry->name);
2976 }
2977
2978 return (new);
2979 }
2980
2981 /* free memory allocated by sdev_merge_vtab */
2982 static void
2983 sdev_free_vtab(fs_operation_def_t *new)
2984 {
2985 kmem_free(new, sdev_vnodeops_tbl_size);
2986 }
2987
2988 /*
2989 * a generic setattr() function
2990 *
2991 * note: flags only supports AT_UID and AT_GID.
2992 * Future enhancements can be done for other types, e.g. AT_MODE
2993 */
2994 int
2995 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2996 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
2997 int), int protocol)
2998 {
2999 struct sdev_node *dv = VTOSDEV(vp);
3000 struct sdev_node *parent = dv->sdev_dotdot;
3001 struct vattr *get;
3002 uint_t mask = vap->va_mask;
3003 int error;
3004
3005 /* some sanity checks */
3006 if (vap->va_mask & AT_NOSET)
3007 return (EINVAL);
3008
3009 if (vap->va_mask & AT_SIZE) {
3010 if (vp->v_type == VDIR) {
3011 return (EISDIR);
3012 }
3013 }
3014
3015 /* no need to set attribute, but do not fail either */
3016 ASSERT(parent);
3017 rw_enter(&parent->sdev_contents, RW_READER);
3018 if (dv->sdev_state == SDEV_ZOMBIE) {
3019 rw_exit(&parent->sdev_contents);
3020 return (0);
3021 }
3022
3023 /* If backing store exists, just set it. */
3024 if (dv->sdev_attrvp) {
3025 rw_exit(&parent->sdev_contents);
3026 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3027 }
3028
3029 /*
3030 * Otherwise, for nodes with the persistence attribute, create it.
3031 */
3032 ASSERT(dv->sdev_attr);
3033 if (SDEV_IS_PERSIST(dv) ||
3034 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3035 sdev_vattr_merge(dv, vap);
3036 rw_enter(&dv->sdev_contents, RW_WRITER);
3037 error = sdev_shadow_node(dv, cred);
3038 rw_exit(&dv->sdev_contents);
3039 rw_exit(&parent->sdev_contents);
3040
3041 if (error)
3042 return (error);
3043 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3044 }
3045
3046
3047 /*
3048 * sdev_attr was allocated in sdev_mknode
3049 */
3050 rw_enter(&dv->sdev_contents, RW_WRITER);
3051 error = secpolicy_vnode_setattr(cred, vp, vap,
3052 dv->sdev_attr, flags, sdev_unlocked_access, dv);
3053 if (error) {
3054 rw_exit(&dv->sdev_contents);
3055 rw_exit(&parent->sdev_contents);
3056 return (error);
3057 }
3058
3059 get = dv->sdev_attr;
3060 if (mask & AT_MODE) {
3061 get->va_mode &= S_IFMT;
3062 get->va_mode |= vap->va_mode & ~S_IFMT;
3063 }
3064
3065 if ((mask & AT_UID) || (mask & AT_GID)) {
3066 if (mask & AT_UID)
3067 get->va_uid = vap->va_uid;
3068 if (mask & AT_GID)
3069 get->va_gid = vap->va_gid;
3070 /*
3071 * a callback must be provided if the protocol is set
3072 */
3073 if ((protocol & AT_UID) || (protocol & AT_GID)) {
3074 ASSERT(callback);
3075 error = callback(dv, get, protocol);
3076 if (error) {
3077 rw_exit(&dv->sdev_contents);
3078 rw_exit(&parent->sdev_contents);
3079 return (error);
3080 }
3081 }
3082 }
3083
3084 if (mask & AT_ATIME)
3085 get->va_atime = vap->va_atime;
3086 if (mask & AT_MTIME)
3087 get->va_mtime = vap->va_mtime;
3088 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3089 gethrestime(&get->va_ctime);
3090 }
3091
3092 sdev_vattr_merge(dv, get);
3093 rw_exit(&dv->sdev_contents);
3094 rw_exit(&parent->sdev_contents);
3095 return (0);
3096 }
3097
3098 /*
3099 * a generic inactive() function
3100 */
3101 /*ARGSUSED*/
3102 void
3103 devname_inactive_func(struct vnode *vp, struct cred *cred,
3104 void (*callback)(struct vnode *))
3105 {
3106 int clean;
3107 struct sdev_node *dv = VTOSDEV(vp);
3108 int state;
3109
3110 mutex_enter(&vp->v_lock);
3111 ASSERT(vp->v_count >= 1);
3112
3113
3114 if (vp->v_count == 1 && callback != NULL)
3115 callback(vp);
3116
3117 rw_enter(&dv->sdev_contents, RW_WRITER);
3118 state = dv->sdev_state;
3119
3120 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3121
3122 /*
3123 * sdev is a rather bad public citizen. It violates the general
3124 * agreement that in memory nodes should always have a valid reference
3125 * count on their vnode. But that's not the case here. This means that
3126 * we do actually have to distinguish between getting inactive callbacks
3127 * for zombies and otherwise. This should probably be fixed.
3128 */
3129 if (clean) {
3130 /* Remove the . entry to ourselves */
3131 if (vp->v_type == VDIR) {
3132 decr_link(dv);
3133 }
3134 VERIFY(dv->sdev_nlink == 1);
3135 decr_link(dv);
3136 --vp->v_count;
3137 rw_exit(&dv->sdev_contents);
3138 mutex_exit(&vp->v_lock);
3139 sdev_nodedestroy(dv, 0);
3140 } else {
3141 --vp->v_count;
3142 rw_exit(&dv->sdev_contents);
3143 mutex_exit(&vp->v_lock);
3144 }
3145 }