1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
24 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
25 * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
26 */
27
28 /*
29 * ZFS control directory (a.k.a. ".zfs")
30 *
31 * This directory provides a common location for all ZFS meta-objects.
32 * Currently, this is only the 'snapshot' directory, but this may expand in the
33 * future. The elements are built using the GFS primitives, as the hierarchy
34 * does not actually exist on disk.
35 *
36 * For 'snapshot', we don't want to have all snapshots always mounted, because
37 * this would take up a huge amount of space in /etc/mnttab. We have three
38 * types of objects:
39 *
40 * ctldir ------> snapshotdir -------> snapshot
41 * |
42 * |
43 * V
44 * mounted fs
45 *
46 * The 'snapshot' node contains just enough information to lookup '..' and act
47 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
48 * perform an automount of the underlying filesystem and return the
49 * corresponding vnode.
50 *
51 * All mounts are handled automatically by the kernel, but unmounts are
52 * (currently) handled from user land. The main reason is that there is no
53 * reliable way to auto-unmount the filesystem when it's "no longer in use".
54 * When the user unmounts a filesystem, we call zfsctl_unmount(), which
55 * unmounts any snapshots within the snapshot directory.
56 *
57 * The '.zfs', '.zfs/snapshot', and all directories created under
58 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
59 * share the same vfs_t as the head filesystem (what '.zfs' lives under).
60 *
61 * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
62 * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
63 * However, vnodes within these mounted on file systems have their v_vfsp
64 * fields set to the head filesystem to make NFS happy (see
65 * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
66 * so that it cannot be freed until all snapshots have been unmounted.
67 */
68
69 #include <fs/fs_subr.h>
70 #include <sys/zfs_ctldir.h>
71 #include <sys/zfs_ioctl.h>
72 #include <sys/zfs_vfsops.h>
73 #include <sys/vfs_opreg.h>
74 #include <sys/gfs.h>
75 #include <sys/stat.h>
76 #include <sys/dmu.h>
77 #include <sys/dsl_destroy.h>
78 #include <sys/dsl_deleg.h>
79 #include <sys/mount.h>
80 #include <sys/sunddi.h>
81 #include <sys/autosnap.h>
82
83 #include "zfs_namecheck.h"
84
85 typedef struct zfsctl_node {
86 gfs_dir_t zc_gfs_private;
87 uint64_t zc_id;
88 timestruc_t zc_cmtime; /* ctime and mtime, always the same */
89 } zfsctl_node_t;
90
91 typedef struct zfsctl_snapdir {
92 zfsctl_node_t sd_node;
93 kmutex_t sd_lock;
94 avl_tree_t sd_snaps;
95 } zfsctl_snapdir_t;
96
97 typedef struct {
98 char *se_name;
99 vnode_t *se_root;
100 avl_node_t se_node;
101 } zfs_snapentry_t;
102
103 static int
104 snapentry_compare(const void *a, const void *b)
105 {
106 const zfs_snapentry_t *sa = a;
107 const zfs_snapentry_t *sb = b;
108 int ret = strcmp(sa->se_name, sb->se_name);
109
110 if (ret < 0)
111 return (-1);
112 else if (ret > 0)
113 return (1);
114 else
115 return (0);
116 }
117
118 vnodeops_t *zfsctl_ops_root;
119 vnodeops_t *zfsctl_ops_snapdir;
120 vnodeops_t *zfsctl_ops_snapshot;
121 vnodeops_t *zfsctl_ops_shares;
122
123 static const fs_operation_def_t zfsctl_tops_root[];
124 static const fs_operation_def_t zfsctl_tops_snapdir[];
125 static const fs_operation_def_t zfsctl_tops_snapshot[];
126 static const fs_operation_def_t zfsctl_tops_shares[];
127
128 static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
129 static vnode_t *zfsctl_mknode_shares(vnode_t *);
130 static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
131 static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
132
133 static gfs_opsvec_t zfsctl_opsvec[] = {
134 { ".zfs", zfsctl_tops_root, &zfsctl_ops_root },
135 { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
136 { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
137 { ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares },
138 { NULL }
139 };
140
141 /*
142 * Root directory elements. We only have two entries
143 * snapshot and shares.
144 */
145 static gfs_dirent_t zfsctl_root_entries[] = {
146 { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
147 { "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
148 { NULL }
149 };
150
151 /* include . and .. in the calculation */
152 #define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \
153 sizeof (gfs_dirent_t)) + 1)
154
155
156 /*
157 * Initialize the various GFS pieces we'll need to create and manipulate .zfs
158 * directories. This is called from the ZFS init routine, and initializes the
159 * vnode ops vectors that we'll be using.
160 */
161 void
162 zfsctl_init(void)
163 {
164 VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0);
165 }
166
167 void
168 zfsctl_fini(void)
169 {
170 /*
171 * Remove vfsctl vnode ops
172 */
173 if (zfsctl_ops_root)
174 vn_freevnodeops(zfsctl_ops_root);
175 if (zfsctl_ops_snapdir)
176 vn_freevnodeops(zfsctl_ops_snapdir);
177 if (zfsctl_ops_snapshot)
178 vn_freevnodeops(zfsctl_ops_snapshot);
179 if (zfsctl_ops_shares)
180 vn_freevnodeops(zfsctl_ops_shares);
181
182 zfsctl_ops_root = NULL;
183 zfsctl_ops_snapdir = NULL;
184 zfsctl_ops_snapshot = NULL;
185 zfsctl_ops_shares = NULL;
186 }
187
188 boolean_t
189 zfsctl_is_node(vnode_t *vp)
190 {
191 return (vn_matchops(vp, zfsctl_ops_root) ||
192 vn_matchops(vp, zfsctl_ops_snapdir) ||
193 vn_matchops(vp, zfsctl_ops_snapshot) ||
194 vn_matchops(vp, zfsctl_ops_shares));
195
196 }
197
198 /*
199 * Return the inode number associated with the 'snapshot' or
200 * 'shares' directory.
201 */
202 /* ARGSUSED */
203 static ino64_t
204 zfsctl_root_inode_cb(vnode_t *vp, int index)
205 {
206 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
207
208 ASSERT(index < 2);
209
210 if (index == 0)
211 return (ZFSCTL_INO_SNAPDIR);
212
213 return (zfsvfs->z_shares_dir);
214 }
215
216 /*
217 * Create the '.zfs' directory. This directory is cached as part of the VFS
218 * structure. This results in a hold on the vfs_t. The code in zfs_umount()
219 * therefore checks against a vfs_count of 2 instead of 1. This reference
220 * is removed when the ctldir is destroyed in the unmount.
221 */
222 void
223 zfsctl_create(zfsvfs_t *zfsvfs)
224 {
225 vnode_t *vp, *rvp;
226 zfsctl_node_t *zcp;
227 uint64_t crtime[2];
228
229 ASSERT(zfsvfs->z_ctldir == NULL);
230
231 vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
232 zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
233 zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
234 zcp = vp->v_data;
235 zcp->zc_id = ZFSCTL_INO_ROOT;
236
237 VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0);
238 VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
239 &crtime, sizeof (crtime)));
240 ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);
241 VN_RELE(rvp);
242
243 /*
244 * We're only faking the fact that we have a root of a filesystem for
245 * the sake of the GFS interfaces. Undo the flag manipulation it did
246 * for us.
247 */
248 vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT);
249
250 zfsvfs->z_ctldir = vp;
251 }
252
253 /*
254 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
255 * There might still be more references if we were force unmounted, but only
256 * new zfs_inactive() calls can occur and they don't reference .zfs
257 */
258 void
259 zfsctl_destroy(zfsvfs_t *zfsvfs)
260 {
261 VN_RELE(zfsvfs->z_ctldir);
262 zfsvfs->z_ctldir = NULL;
263 }
264
265 /*
266 * Given a root znode, retrieve the associated .zfs directory.
267 * Add a hold to the vnode and return it.
268 */
269 vnode_t *
270 zfsctl_root(znode_t *zp)
271 {
272 ASSERT(zfs_has_ctldir(zp));
273 VN_HOLD(zp->z_zfsvfs->z_ctldir);
274 return (zp->z_zfsvfs->z_ctldir);
275 }
276
277 /*
278 * Common open routine. Disallow any write access.
279 */
280 /* ARGSUSED */
281 static int
282 zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
283 {
284 if (flags & FWRITE)
285 return (SET_ERROR(EACCES));
286
287 return (0);
288 }
289
290 /*
291 * Common close routine. Nothing to do here.
292 */
293 /* ARGSUSED */
294 static int
295 zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off,
296 cred_t *cr, caller_context_t *ct)
297 {
298 return (0);
299 }
300
301 /*
302 * Common access routine. Disallow writes.
303 */
304 /* ARGSUSED */
305 static int
306 zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr,
307 caller_context_t *ct)
308 {
309 if (flags & V_ACE_MASK) {
310 if (mode & ACE_ALL_WRITE_PERMS)
311 return (SET_ERROR(EACCES));
312 } else {
313 if (mode & VWRITE)
314 return (SET_ERROR(EACCES));
315 }
316
317 return (0);
318 }
319
320 /*
321 * Common getattr function. Fill in basic information.
322 */
323 static void
324 zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
325 {
326 timestruc_t now;
327
328 vap->va_uid = 0;
329 vap->va_gid = 0;
330 vap->va_rdev = 0;
331 /*
332 * We are a purely virtual object, so we have no
333 * blocksize or allocated blocks.
334 */
335 vap->va_blksize = 0;
336 vap->va_nblocks = 0;
337 vap->va_seq = 0;
338 vap->va_fsid = vp->v_vfsp->vfs_dev;
339 vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
340 S_IROTH | S_IXOTH;
341 vap->va_type = VDIR;
342 /*
343 * We live in the now (for atime).
344 */
345 gethrestime(&now);
346 vap->va_atime = now;
347 }
348
349 /*ARGSUSED*/
350 static int
351 zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
352 {
353 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
354 zfsctl_node_t *zcp = vp->v_data;
355 uint64_t object = zcp->zc_id;
356 zfid_short_t *zfid;
357 int i;
358
359 ZFS_ENTER(zfsvfs);
360
361 if (fidp->fid_len < SHORT_FID_LEN) {
362 fidp->fid_len = SHORT_FID_LEN;
363 ZFS_EXIT(zfsvfs);
364 return (SET_ERROR(ENOSPC));
365 }
366
367 zfid = (zfid_short_t *)fidp;
368
369 zfid->zf_len = SHORT_FID_LEN;
370
371 for (i = 0; i < sizeof (zfid->zf_object); i++)
372 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
373
374 /* .zfs znodes always have a generation number of 0 */
375 for (i = 0; i < sizeof (zfid->zf_gen); i++)
376 zfid->zf_gen[i] = 0;
377
378 ZFS_EXIT(zfsvfs);
379 return (0);
380 }
381
382
383 /*ARGSUSED*/
384 static int
385 zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
386 {
387 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
388 znode_t *dzp;
389 int error;
390
391 ZFS_ENTER(zfsvfs);
392
393 if (zfsvfs->z_shares_dir == 0) {
394 ZFS_EXIT(zfsvfs);
395 return (SET_ERROR(ENOTSUP));
396 }
397
398 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
399 error = VOP_FID(ZTOV(dzp), fidp, ct);
400 VN_RELE(ZTOV(dzp));
401 }
402
403 ZFS_EXIT(zfsvfs);
404 return (error);
405 }
406 /*
407 * .zfs inode namespace
408 *
409 * We need to generate unique inode numbers for all files and directories
410 * within the .zfs pseudo-filesystem. We use the following scheme:
411 *
412 * ENTRY ZFSCTL_INODE
413 * .zfs 1
414 * .zfs/snapshot 2
415 * .zfs/snapshot/<snap> objectid(snap)
416 */
417
418 #define ZFSCTL_INO_SNAP(id) (id)
419
420 /*
421 * Get root directory attributes.
422 */
423 /* ARGSUSED */
424 static int
425 zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
426 caller_context_t *ct)
427 {
428 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
429 zfsctl_node_t *zcp = vp->v_data;
430
431 ZFS_ENTER(zfsvfs);
432 vap->va_nodeid = ZFSCTL_INO_ROOT;
433 vap->va_nlink = vap->va_size = NROOT_ENTRIES;
434 vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
435
436 zfsctl_common_getattr(vp, vap);
437 ZFS_EXIT(zfsvfs);
438
439 return (0);
440 }
441
442 /*
443 * Special case the handling of "..".
444 */
445 /* ARGSUSED */
446 int
447 zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
448 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
449 int *direntflags, pathname_t *realpnp)
450 {
451 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
452 int err;
453
454 /*
455 * No extended attributes allowed under .zfs
456 */
457 if (flags & LOOKUP_XATTR)
458 return (SET_ERROR(EINVAL));
459
460 ZFS_ENTER(zfsvfs);
461
462 if (strcmp(nm, "..") == 0) {
463 err = VFS_ROOT(dvp->v_vfsp, vpp);
464 } else {
465 err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
466 cr, ct, direntflags, realpnp);
467 }
468
469 ZFS_EXIT(zfsvfs);
470
471 return (err);
472 }
473
474 static int
475 zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
476 caller_context_t *ct)
477 {
478 /*
479 * We only care about ACL_ENABLED so that libsec can
480 * display ACL correctly and not default to POSIX draft.
481 */
482 if (cmd == _PC_ACL_ENABLED) {
483 *valp = _ACL_ACE_ENABLED;
484 return (0);
485 }
486
487 return (fs_pathconf(vp, cmd, valp, cr, ct));
488 }
489
490 static const fs_operation_def_t zfsctl_tops_root[] = {
491 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
492 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
493 { VOPNAME_IOCTL, { .error = fs_inval } },
494 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_root_getattr } },
495 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
496 { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } },
497 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_root_lookup } },
498 { VOPNAME_SEEK, { .vop_seek = fs_seek } },
499 { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } },
500 { VOPNAME_PATHCONF, { .vop_pathconf = zfsctl_pathconf } },
501 { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } },
502 { NULL }
503 };
504
505 /*
506 * Gets the full dataset name that corresponds to the given snapshot name
507 * Example:
508 * zfsctl_snapshot_zname("snap1") -> "mypool/myfs@snap1"
509 */
510 static int
511 zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
512 {
513 objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
514
515 if (zfs_component_namecheck(name, NULL, NULL) != 0)
516 return (SET_ERROR(EILSEQ));
517 dmu_objset_name(os, zname);
518 if (strlen(zname) + 1 + strlen(name) >= len)
519 return (SET_ERROR(ENAMETOOLONG));
520 (void) strcat(zname, "@");
521 (void) strcat(zname, name);
522 return (0);
523 }
524
525 static int
526 zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
527 {
528 vnode_t *svp = sep->se_root;
529 int error;
530
531 ASSERT(vn_ismntpt(svp));
532
533 /* this will be dropped by dounmount() */
534 if ((error = vn_vfswlock(svp)) != 0)
535 return (error);
536
537 VN_HOLD(svp);
538 error = dounmount(vn_mountedvfs(svp), fflags, cr);
539 if (error) {
540 VN_RELE(svp);
541 return (error);
542 }
543
544 /*
545 * We can't use VN_RELE(), as that will try to invoke
546 * zfsctl_snapdir_inactive(), which would cause us to destroy
547 * the sd_lock mutex held by our caller.
548 */
549 ASSERT(svp->v_count == 1);
550 gfs_vop_inactive(svp, cr, NULL);
551
552 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
553 kmem_free(sep, sizeof (zfs_snapentry_t));
554
555 return (0);
556 }
557
558 static void
559 zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
560 {
561 avl_index_t where;
562 vfs_t *vfsp;
563 refstr_t *pathref;
564 char newpath[MAXNAMELEN];
565 char *tail;
566
567 ASSERT(MUTEX_HELD(&sdp->sd_lock));
568 ASSERT(sep != NULL);
569
570 vfsp = vn_mountedvfs(sep->se_root);
571 ASSERT(vfsp != NULL);
572
573 vfs_lock_wait(vfsp);
574
575 /*
576 * Change the name in the AVL tree.
577 */
578 avl_remove(&sdp->sd_snaps, sep);
579 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
580 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
581 (void) strcpy(sep->se_name, nm);
582 VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
583 avl_insert(&sdp->sd_snaps, sep, where);
584
585 /*
586 * Change the current mountpoint info:
587 * - update the tail of the mntpoint path
588 * - update the tail of the resource path
589 */
590 pathref = vfs_getmntpoint(vfsp);
591 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
592 VERIFY((tail = strrchr(newpath, '/')) != NULL);
593 *(tail+1) = '\0';
594 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
595 (void) strcat(newpath, nm);
596 refstr_rele(pathref);
597 vfs_setmntpoint(vfsp, newpath, 0);
598
599 pathref = vfs_getresource(vfsp);
600 (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
601 VERIFY((tail = strrchr(newpath, '@')) != NULL);
602 *(tail+1) = '\0';
603 ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
604 (void) strcat(newpath, nm);
605 refstr_rele(pathref);
606 vfs_setresource(vfsp, newpath, 0);
607
608 vfs_unlock(vfsp);
609 }
610
611 /*ARGSUSED*/
612 static int
613 zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
614 cred_t *cr, caller_context_t *ct, int flags)
615 {
616 zfsctl_snapdir_t *sdp = sdvp->v_data;
617 zfs_snapentry_t search, *sep;
618 zfsvfs_t *zfsvfs;
619 avl_index_t where;
620 char from[ZFS_MAX_DATASET_NAME_LEN], to[ZFS_MAX_DATASET_NAME_LEN];
621 char real[ZFS_MAX_DATASET_NAME_LEN], fsname[ZFS_MAX_DATASET_NAME_LEN];
622 int err;
623
624 zfsvfs = sdvp->v_vfsp->vfs_data;
625 ZFS_ENTER(zfsvfs);
626
627 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
628 err = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
629 sizeof (real), NULL);
630 if (err == 0) {
631 snm = real;
632 } else if (err != ENOTSUP) {
633 ZFS_EXIT(zfsvfs);
634 return (err);
635 }
636 }
637
638 ZFS_EXIT(zfsvfs);
639
640 dmu_objset_name(zfsvfs->z_os, fsname);
641
642 err = zfsctl_snapshot_zname(sdvp, snm, sizeof (from), from);
643 if (err == 0)
644 err = zfsctl_snapshot_zname(tdvp, tnm, sizeof (to), to);
645 if (err == 0)
646 err = zfs_secpolicy_rename_perms(from, to, cr);
647 if (err != 0)
648 return (err);
649
650 /*
651 * Cannot move snapshots out of the snapdir.
652 */
653 if (sdvp != tdvp)
654 return (SET_ERROR(EINVAL));
655
656 if (strcmp(snm, tnm) == 0)
657 return (0);
658
659 mutex_enter(&sdp->sd_lock);
660
661 search.se_name = (char *)snm;
662 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
663 mutex_exit(&sdp->sd_lock);
664 return (SET_ERROR(ENOENT));
665 }
666
667 err = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
668 if (err == 0)
669 zfsctl_rename_snap(sdp, sep, tnm);
670
671 mutex_exit(&sdp->sd_lock);
672
673 return (err);
674 }
675
676 /* ARGSUSED */
677 static int
678 zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
679 caller_context_t *ct, int flags)
680 {
681 zfsctl_snapdir_t *sdp = dvp->v_data;
682 zfs_snapentry_t *sep;
683 zfs_snapentry_t search;
684 zfsvfs_t *zfsvfs;
685 char snapname[ZFS_MAX_DATASET_NAME_LEN];
686 char real[ZFS_MAX_DATASET_NAME_LEN];
687 int err;
688
689 zfsvfs = dvp->v_vfsp->vfs_data;
690 ZFS_ENTER(zfsvfs);
691
692 if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
693
694 err = dmu_snapshot_realname(zfsvfs->z_os, name, real,
695 sizeof (real), NULL);
696 if (err == 0) {
697 name = real;
698 } else if (err != ENOTSUP) {
699 ZFS_EXIT(zfsvfs);
700 return (err);
701 }
702 }
703
704 ZFS_EXIT(zfsvfs);
705
706 err = zfsctl_snapshot_zname(dvp, name, sizeof (snapname), snapname);
707 if (err == 0)
708 err = zfs_secpolicy_destroy_perms(snapname, cr);
709 if (err != 0)
710 return (err);
711
712 mutex_enter(&sdp->sd_lock);
713
714 search.se_name = name;
715 sep = avl_find(&sdp->sd_snaps, &search, NULL);
716 if (sep) {
717 avl_remove(&sdp->sd_snaps, sep);
718 err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
719 if (err != 0)
720 avl_add(&sdp->sd_snaps, sep);
721 else
722 err = dsl_destroy_snapshot(snapname, B_FALSE);
723 } else {
724 err = SET_ERROR(ENOENT);
725 }
726
727 mutex_exit(&sdp->sd_lock);
728
729 return (err);
730 }
731
732 /*
733 * This creates a snapshot under '.zfs/snapshot'.
734 */
735 /* ARGSUSED */
736 static int
737 zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp,
738 cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp)
739 {
740 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
741 char name[ZFS_MAX_DATASET_NAME_LEN];
742 int err;
743 static enum symfollow follow = NO_FOLLOW;
744 static enum uio_seg seg = UIO_SYSSPACE;
745
746 if (zfs_component_namecheck(dirname, NULL, NULL) != 0 ||
747 autosnap_check_name(dirname))
748 return (SET_ERROR(EILSEQ));
749
750 dmu_objset_name(zfsvfs->z_os, name);
751
752 *vpp = NULL;
753
754 err = zfs_secpolicy_snapshot_perms(name, cr);
755 if (err != 0)
756 return (err);
757
758 if (err == 0) {
759 err = dmu_objset_snapshot_one(name, dirname);
760 if (err != 0)
761 return (err);
762 err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
763 }
764
765 return (err);
766 }
767
768 /*
769 * Lookup entry point for the 'snapshot' directory. Try to open the
770 * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
771 * Perform a mount of the associated dataset on top of the vnode.
772 */
773 /* ARGSUSED */
774 static int
775 zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
776 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
777 int *direntflags, pathname_t *realpnp)
778 {
779 zfsctl_snapdir_t *sdp = dvp->v_data;
780 objset_t *snap;
781 char snapname[ZFS_MAX_DATASET_NAME_LEN];
782 char real[ZFS_MAX_DATASET_NAME_LEN];
783 char *mountpoint;
784 zfs_snapentry_t *sep, search;
785 struct mounta margs;
786 vfs_t *vfsp;
787 size_t mountpoint_len;
788 avl_index_t where;
789 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
790 int err;
791
792 /*
793 * No extended attributes allowed under .zfs
794 */
795 if (flags & LOOKUP_XATTR)
796 return (SET_ERROR(EINVAL));
797
798 ASSERT(dvp->v_type == VDIR);
799
800 /*
801 * If we get a recursive call, that means we got called
802 * from the domount() code while it was trying to look up the
803 * spec (which looks like a local path for zfs). We need to
804 * add some flag to domount() to tell it not to do this lookup.
805 */
806 if (MUTEX_HELD(&sdp->sd_lock))
807 return (SET_ERROR(ENOENT));
808
809 ZFS_ENTER(zfsvfs);
810
811 if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
812 ZFS_EXIT(zfsvfs);
813 return (0);
814 }
815
816 if (flags & FIGNORECASE) {
817 boolean_t conflict = B_FALSE;
818
819 err = dmu_snapshot_realname(zfsvfs->z_os, nm, real,
820 sizeof (real), &conflict);
821 if (err == 0) {
822 nm = real;
823 } else if (err != ENOTSUP) {
824 ZFS_EXIT(zfsvfs);
825 return (err);
826 }
827 if (realpnp)
828 (void) strlcpy(realpnp->pn_buf, nm,
829 realpnp->pn_bufsize);
830 if (conflict && direntflags)
831 *direntflags = ED_CASE_CONFLICT;
832 }
833
834 mutex_enter(&sdp->sd_lock);
835 search.se_name = (char *)nm;
836 if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
837 *vpp = sep->se_root;
838 VN_HOLD(*vpp);
839 err = traverse(vpp);
840 if (err != 0) {
841 VN_RELE(*vpp);
842 *vpp = NULL;
843 } else if (*vpp == sep->se_root) {
844 /*
845 * The snapshot was unmounted behind our backs,
846 * try to remount it.
847 */
848 goto domount;
849 } else {
850 /*
851 * VROOT was set during the traverse call. We need
852 * to clear it since we're pretending to be part
853 * of our parent's vfs.
854 */
855 (*vpp)->v_flag &= ~VROOT;
856 }
857 mutex_exit(&sdp->sd_lock);
858 ZFS_EXIT(zfsvfs);
859 return (err);
860 }
861
862 /*
863 * The requested snapshot is not currently mounted, look it up.
864 */
865 err = zfsctl_snapshot_zname(dvp, nm, sizeof (snapname), snapname);
866 if (err != 0) {
867 mutex_exit(&sdp->sd_lock);
868 ZFS_EXIT(zfsvfs);
869 /*
870 * handle "ls *" or "?" in a graceful manner,
871 * forcing EILSEQ to ENOENT.
872 * Since shell ultimately passes "*" or "?" as name to lookup
873 */
874 return (err == EILSEQ ? ENOENT : err);
875 }
876
877 if (autosnap_check_name(strchr(snapname, '@'))) {
878 mutex_exit(&sdp->sd_lock);
879 ZFS_EXIT(zfsvfs);
880 return (SET_ERROR(ENOENT));
881 }
882
883 if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
884 mutex_exit(&sdp->sd_lock);
885 ZFS_EXIT(zfsvfs);
886 return (SET_ERROR(ENOENT));
887 }
888
889 sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
890 sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
891 (void) strcpy(sep->se_name, nm);
892 *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
893 avl_insert(&sdp->sd_snaps, sep, where);
894
895 dmu_objset_rele(snap, FTAG);
896 domount:
897 mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
898 strlen("/.zfs/snapshot/") + strlen(nm) + 1;
899 mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
900 (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
901 refstr_value(dvp->v_vfsp->vfs_mntpt), nm);
902
903 margs.spec = snapname;
904 margs.dir = mountpoint;
905 margs.flags = MS_SYSSPACE | MS_NOMNTTAB;
906 margs.fstype = "zfs";
907 margs.dataptr = NULL;
908 margs.datalen = 0;
909 margs.optptr = NULL;
910 margs.optlen = 0;
911
912 err = domount("zfs", &margs, *vpp, kcred, &vfsp);
913 kmem_free(mountpoint, mountpoint_len);
914
915 if (err == 0) {
916 /*
917 * Return the mounted root rather than the covered mount point.
918 * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns
919 * the ZFS vnode mounted on top of the GFS node. This ZFS
920 * vnode is the root of the newly created vfsp.
921 */
922 VFS_RELE(vfsp);
923 err = traverse(vpp);
924 }
925
926 if (err == 0) {
927 /*
928 * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
929 *
930 * This is where we lie about our v_vfsp in order to
931 * make .zfs/snapshot/<snapname> accessible over NFS
932 * without requiring manual mounts of <snapname>.
933 */
934 ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
935 VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
936 (*vpp)->v_vfsp = zfsvfs->z_vfs;
937 (*vpp)->v_flag &= ~VROOT;
938 }
939 mutex_exit(&sdp->sd_lock);
940 ZFS_EXIT(zfsvfs);
941
942 /*
943 * If we had an error, drop our hold on the vnode and
944 * zfsctl_snapshot_inactive() will clean up.
945 */
946 if (err != 0) {
947 VN_RELE(*vpp);
948 *vpp = NULL;
949 }
950 return (err);
951 }
952
953 /* ARGSUSED */
954 static int
955 zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
956 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
957 int *direntflags, pathname_t *realpnp)
958 {
959 zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
960 znode_t *dzp;
961 int error;
962
963 ZFS_ENTER(zfsvfs);
964
965 if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
966 ZFS_EXIT(zfsvfs);
967 return (0);
968 }
969
970 if (zfsvfs->z_shares_dir == 0) {
971 ZFS_EXIT(zfsvfs);
972 return (SET_ERROR(ENOTSUP));
973 }
974 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
975 error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp,
976 flags, rdir, cr, ct, direntflags, realpnp);
977 VN_RELE(ZTOV(dzp));
978 }
979
980 ZFS_EXIT(zfsvfs);
981
982 return (error);
983 }
984
985 /* ARGSUSED */
986 static int
987 zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
988 offset_t *offp, offset_t *nextp, void *data, int flags)
989 {
990 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
991 char snapname[ZFS_MAX_DATASET_NAME_LEN];
992 uint64_t id, cookie;
993 boolean_t case_conflict;
994 int error;
995
996 ZFS_ENTER(zfsvfs);
997
998 cookie = *offp;
999 dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
1000 do {
1001 error = dmu_snapshot_list_next(zfsvfs->z_os,
1002 sizeof (snapname), snapname, &id, &cookie, &case_conflict);
1003 } while (error == 0 && autosnap_check_name(snapname));
1004 dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
1005 if (error) {
1006 ZFS_EXIT(zfsvfs);
1007 if (error == ENOENT) {
1008 *eofp = 1;
1009 return (0);
1010 }
1011 return (error);
1012 }
1013
1014 if (flags & V_RDDIR_ENTFLAGS) {
1015 edirent_t *eodp = dp;
1016
1017 (void) strcpy(eodp->ed_name, snapname);
1018 eodp->ed_ino = ZFSCTL_INO_SNAP(id);
1019 eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
1020 } else {
1021 struct dirent64 *odp = dp;
1022
1023 (void) strcpy(odp->d_name, snapname);
1024 odp->d_ino = ZFSCTL_INO_SNAP(id);
1025 }
1026 *nextp = cookie;
1027
1028 ZFS_EXIT(zfsvfs);
1029
1030 return (0);
1031 }
1032
1033 /* ARGSUSED */
1034 static int
1035 zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
1036 caller_context_t *ct, int flags)
1037 {
1038 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1039 znode_t *dzp;
1040 int error;
1041
1042 ZFS_ENTER(zfsvfs);
1043
1044 if (zfsvfs->z_shares_dir == 0) {
1045 ZFS_EXIT(zfsvfs);
1046 return (SET_ERROR(ENOTSUP));
1047 }
1048 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1049 error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags);
1050 VN_RELE(ZTOV(dzp));
1051 } else {
1052 *eofp = 1;
1053 error = SET_ERROR(ENOENT);
1054 }
1055
1056 ZFS_EXIT(zfsvfs);
1057 return (error);
1058 }
1059
1060 /*
1061 * pvp is the '.zfs' directory (zfsctl_node_t).
1062 *
1063 * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
1064 *
1065 * This function is the callback to create a GFS vnode for '.zfs/snapshot'
1066 * when a lookup is performed on .zfs for "snapshot".
1067 */
1068 vnode_t *
1069 zfsctl_mknode_snapdir(vnode_t *pvp)
1070 {
1071 vnode_t *vp;
1072 zfsctl_snapdir_t *sdp;
1073
1074 vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp,
1075 zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
1076 zfsctl_snapdir_readdir_cb, NULL);
1077 sdp = vp->v_data;
1078 sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
1079 sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1080 mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
1081 avl_create(&sdp->sd_snaps, snapentry_compare,
1082 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
1083 return (vp);
1084 }
1085
1086 vnode_t *
1087 zfsctl_mknode_shares(vnode_t *pvp)
1088 {
1089 vnode_t *vp;
1090 zfsctl_node_t *sdp;
1091
1092 vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1093 zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,
1094 NULL, NULL);
1095 sdp = vp->v_data;
1096 sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1097 return (vp);
1098
1099 }
1100
1101 /* ARGSUSED */
1102 static int
1103 zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1104 caller_context_t *ct)
1105 {
1106 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1107 znode_t *dzp;
1108 int error;
1109
1110 ZFS_ENTER(zfsvfs);
1111 if (zfsvfs->z_shares_dir == 0) {
1112 ZFS_EXIT(zfsvfs);
1113 return (SET_ERROR(ENOTSUP));
1114 }
1115 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1116 error = VOP_GETATTR(ZTOV(dzp), vap, flags, cr, ct);
1117 VN_RELE(ZTOV(dzp));
1118 }
1119 ZFS_EXIT(zfsvfs);
1120 return (error);
1121
1122
1123 }
1124
1125 /* ARGSUSED */
1126 static int
1127 zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1128 caller_context_t *ct)
1129 {
1130 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1131 zfsctl_snapdir_t *sdp = vp->v_data;
1132
1133 ZFS_ENTER(zfsvfs);
1134 zfsctl_common_getattr(vp, vap);
1135 vap->va_nodeid = gfs_file_inode(vp);
1136 vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
1137 vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
1138 ZFS_EXIT(zfsvfs);
1139
1140 return (0);
1141 }
1142
1143 /* ARGSUSED */
1144 static void
1145 zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1146 {
1147 zfsctl_snapdir_t *sdp = vp->v_data;
1148 void *private;
1149
1150 private = gfs_dir_inactive(vp);
1151 if (private != NULL) {
1152 ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
1153 mutex_destroy(&sdp->sd_lock);
1154 avl_destroy(&sdp->sd_snaps);
1155 kmem_free(private, sizeof (zfsctl_snapdir_t));
1156 }
1157 }
1158
1159 static const fs_operation_def_t zfsctl_tops_snapdir[] = {
1160 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
1161 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
1162 { VOPNAME_IOCTL, { .error = fs_inval } },
1163 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_snapdir_getattr } },
1164 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
1165 { VOPNAME_RENAME, { .vop_rename = zfsctl_snapdir_rename } },
1166 { VOPNAME_RMDIR, { .vop_rmdir = zfsctl_snapdir_remove } },
1167 { VOPNAME_MKDIR, { .vop_mkdir = zfsctl_snapdir_mkdir } },
1168 { VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } },
1169 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_snapdir_lookup } },
1170 { VOPNAME_SEEK, { .vop_seek = fs_seek } },
1171 { VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapdir_inactive } },
1172 { VOPNAME_FID, { .vop_fid = zfsctl_common_fid } },
1173 { NULL }
1174 };
1175
1176 static const fs_operation_def_t zfsctl_tops_shares[] = {
1177 { VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
1178 { VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
1179 { VOPNAME_IOCTL, { .error = fs_inval } },
1180 { VOPNAME_GETATTR, { .vop_getattr = zfsctl_shares_getattr } },
1181 { VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
1182 { VOPNAME_READDIR, { .vop_readdir = zfsctl_shares_readdir } },
1183 { VOPNAME_LOOKUP, { .vop_lookup = zfsctl_shares_lookup } },
1184 { VOPNAME_SEEK, { .vop_seek = fs_seek } },
1185 { VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } },
1186 { VOPNAME_FID, { .vop_fid = zfsctl_shares_fid } },
1187 { NULL }
1188 };
1189
1190 /*
1191 * pvp is the GFS vnode '.zfs/snapshot'.
1192 *
1193 * This creates a GFS node under '.zfs/snapshot' representing each
1194 * snapshot. This newly created GFS node is what we mount snapshot
1195 * vfs_t's ontop of.
1196 */
1197 static vnode_t *
1198 zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
1199 {
1200 vnode_t *vp;
1201 zfsctl_node_t *zcp;
1202
1203 vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1204 zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
1205 zcp = vp->v_data;
1206 zcp->zc_id = objset;
1207
1208 return (vp);
1209 }
1210
1211 static void
1212 zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1213 {
1214 zfsctl_snapdir_t *sdp;
1215 zfs_snapentry_t *sep, *next;
1216 vnode_t *dvp;
1217
1218 VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
1219 sdp = dvp->v_data;
1220
1221 mutex_enter(&sdp->sd_lock);
1222
1223 mutex_enter(&vp->v_lock);
1224 if (vp->v_count > 1) {
1225 VN_RELE_LOCKED(vp);
1226 mutex_exit(&vp->v_lock);
1227 mutex_exit(&sdp->sd_lock);
1228 VN_RELE(dvp);
1229 return;
1230 }
1231 mutex_exit(&vp->v_lock);
1232 ASSERT(!vn_ismntpt(vp));
1233
1234 sep = avl_first(&sdp->sd_snaps);
1235 while (sep != NULL) {
1236 next = AVL_NEXT(&sdp->sd_snaps, sep);
1237
1238 if (sep->se_root == vp) {
1239 avl_remove(&sdp->sd_snaps, sep);
1240 kmem_free(sep->se_name, strlen(sep->se_name) + 1);
1241 kmem_free(sep, sizeof (zfs_snapentry_t));
1242 break;
1243 }
1244 sep = next;
1245 }
1246 ASSERT(sep != NULL);
1247
1248 mutex_exit(&sdp->sd_lock);
1249 VN_RELE(dvp);
1250
1251 /*
1252 * Dispose of the vnode for the snapshot mount point.
1253 * This is safe to do because once this entry has been removed
1254 * from the AVL tree, it can't be found again, so cannot become
1255 * "active". If we lookup the same name again we will end up
1256 * creating a new vnode.
1257 */
1258 gfs_vop_inactive(vp, cr, ct);
1259 }
1260
1261
1262 /*
1263 * These VP's should never see the light of day. They should always
1264 * be covered.
1265 */
1266 static const fs_operation_def_t zfsctl_tops_snapshot[] = {
1267 VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapshot_inactive },
1268 NULL, NULL
1269 };
1270
1271 int
1272 zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1273 {
1274 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1275 vnode_t *dvp, *vp;
1276 zfsctl_snapdir_t *sdp;
1277 zfsctl_node_t *zcp;
1278 zfs_snapentry_t *sep;
1279 int error;
1280
1281 ASSERT(zfsvfs->z_ctldir != NULL);
1282 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1283 NULL, 0, NULL, kcred, NULL, NULL, NULL);
1284 if (error != 0)
1285 return (error);
1286 sdp = dvp->v_data;
1287
1288 mutex_enter(&sdp->sd_lock);
1289 sep = avl_first(&sdp->sd_snaps);
1290 while (sep != NULL) {
1291 vp = sep->se_root;
1292 zcp = vp->v_data;
1293 if (zcp->zc_id == objsetid)
1294 break;
1295
1296 sep = AVL_NEXT(&sdp->sd_snaps, sep);
1297 }
1298
1299 if (sep != NULL) {
1300 VN_HOLD(vp);
1301 /*
1302 * Return the mounted root rather than the covered mount point.
1303 * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
1304 * and returns the ZFS vnode mounted on top of the GFS node.
1305 * This ZFS vnode is the root of the vfs for objset 'objsetid'.
1306 */
1307 error = traverse(&vp);
1308 if (error == 0) {
1309 if (vp == sep->se_root)
1310 error = SET_ERROR(EINVAL);
1311 else
1312 *zfsvfsp = VTOZ(vp)->z_zfsvfs;
1313 }
1314 mutex_exit(&sdp->sd_lock);
1315 VN_RELE(vp);
1316 } else {
1317 error = SET_ERROR(EINVAL);
1318 mutex_exit(&sdp->sd_lock);
1319 }
1320
1321 VN_RELE(dvp);
1322
1323 return (error);
1324 }
1325
1326 /*
1327 * Unmount any snapshots for the given filesystem. This is called from
1328 * zfs_umount() - if we have a ctldir, then go through and unmount all the
1329 * snapshots.
1330 */
1331 int
1332 zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1333 {
1334 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1335 vnode_t *dvp;
1336 zfsctl_snapdir_t *sdp;
1337 zfs_snapentry_t *sep, *next;
1338 int error;
1339
1340 ASSERT(zfsvfs->z_ctldir != NULL);
1341 error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1342 NULL, 0, NULL, cr, NULL, NULL, NULL);
1343 if (error != 0)
1344 return (error);
1345 sdp = dvp->v_data;
1346
1347 mutex_enter(&sdp->sd_lock);
1348
1349 sep = avl_first(&sdp->sd_snaps);
1350 while (sep != NULL) {
1351 next = AVL_NEXT(&sdp->sd_snaps, sep);
1352
1353 /*
1354 * If this snapshot is not mounted, then it must
1355 * have just been unmounted by somebody else, and
1356 * will be cleaned up by zfsctl_snapdir_inactive().
1357 */
1358 if (vn_ismntpt(sep->se_root)) {
1359 avl_remove(&sdp->sd_snaps, sep);
1360 error = zfsctl_unmount_snap(sep, fflags, cr);
1361 if (error) {
1362 avl_add(&sdp->sd_snaps, sep);
1363 break;
1364 }
1365 }
1366 sep = next;
1367 }
1368
1369 mutex_exit(&sdp->sd_lock);
1370 VN_RELE(dvp);
1371
1372 return (error);
1373 }