1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Portions Copyright 2011 Martin Matuska
25 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2012 by Delphix. All rights reserved.
27 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28 */
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/errno.h>
33 #include <sys/uio.h>
34 #include <sys/buf.h>
35 #include <sys/modctl.h>
36 #include <sys/open.h>
37 #include <sys/file.h>
38 #include <sys/kmem.h>
39 #include <sys/conf.h>
40 #include <sys/cmn_err.h>
41 #include <sys/stat.h>
42 #include <sys/zfs_ioctl.h>
43 #include <sys/zfs_vfsops.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/zap.h>
46 #include <sys/spa.h>
47 #include <sys/spa_impl.h>
48 #include <sys/vdev.h>
49 #include <sys/priv_impl.h>
50 #include <sys/dmu.h>
51 #include <sys/dsl_dir.h>
52 #include <sys/dsl_dataset.h>
53 #include <sys/dsl_prop.h>
54 #include <sys/dsl_deleg.h>
55 #include <sys/dmu_objset.h>
56 #include <sys/dmu_impl.h>
57 #include <sys/ddi.h>
58 #include <sys/sunddi.h>
59 #include <sys/sunldi.h>
60 #include <sys/policy.h>
61 #include <sys/zone.h>
62 #include <sys/nvpair.h>
63 #include <sys/pathname.h>
64 #include <sys/mount.h>
65 #include <sys/sdt.h>
66 #include <sys/fs/zfs.h>
67 #include <sys/zfs_ctldir.h>
68 #include <sys/zfs_dir.h>
69 #include <sys/zfs_onexit.h>
70 #include <sys/zvol.h>
71 #include <sys/dsl_scan.h>
72 #include <sharefs/share.h>
73 #include <sys/dmu_objset.h>
74
75 #include "zfs_namecheck.h"
76 #include "zfs_prop.h"
77 #include "zfs_deleg.h"
78 #include "zfs_comutil.h"
79
80 extern struct modlfs zfs_modlfs;
81
82 extern void zfs_init(void);
83 extern void zfs_fini(void);
84
85 ldi_ident_t zfs_li = NULL;
86 dev_info_t *zfs_dip;
87
88 typedef int zfs_ioc_func_t(zfs_cmd_t *);
89 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
90
91 typedef enum {
92 NO_NAME,
93 POOL_NAME,
94 DATASET_NAME
95 } zfs_ioc_namecheck_t;
96
97 typedef enum {
98 POOL_CHECK_NONE = 1 << 0,
99 POOL_CHECK_SUSPENDED = 1 << 1,
100 POOL_CHECK_READONLY = 1 << 2
101 } zfs_ioc_poolcheck_t;
102
103 typedef struct zfs_ioc_vec {
104 zfs_ioc_func_t *zvec_func;
105 zfs_secpolicy_func_t *zvec_secpolicy;
106 zfs_ioc_namecheck_t zvec_namecheck;
107 boolean_t zvec_his_log;
108 zfs_ioc_poolcheck_t zvec_pool_check;
109 } zfs_ioc_vec_t;
110
111 /* This array is indexed by zfs_userquota_prop_t */
112 static const char *userquota_perms[] = {
113 ZFS_DELEG_PERM_USERUSED,
114 ZFS_DELEG_PERM_USERQUOTA,
115 ZFS_DELEG_PERM_GROUPUSED,
116 ZFS_DELEG_PERM_GROUPQUOTA,
117 };
118
119 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
120 static int zfs_check_settable(const char *name, nvpair_t *property,
121 cred_t *cr);
122 static int zfs_check_clearable(char *dataset, nvlist_t *props,
123 nvlist_t **errors);
124 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
125 boolean_t *);
126 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
127
128 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
129 void
130 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
131 {
132 const char *newfile;
133 char buf[512];
134 va_list adx;
135
136 /*
137 * Get rid of annoying "../common/" prefix to filename.
138 */
139 newfile = strrchr(file, '/');
140 if (newfile != NULL) {
141 newfile = newfile + 1; /* Get rid of leading / */
142 } else {
143 newfile = file;
144 }
145
146 va_start(adx, fmt);
147 (void) vsnprintf(buf, sizeof (buf), fmt, adx);
148 va_end(adx);
149
150 /*
151 * To get this data, use the zfs-dprintf probe as so:
152 * dtrace -q -n 'zfs-dprintf \
153 * /stringof(arg0) == "dbuf.c"/ \
154 * {printf("%s: %s", stringof(arg1), stringof(arg3))}'
155 * arg0 = file name
156 * arg1 = function name
157 * arg2 = line number
158 * arg3 = message
159 */
160 DTRACE_PROBE4(zfs__dprintf,
161 char *, newfile, char *, func, int, line, char *, buf);
162 }
163
164 static void
165 history_str_free(char *buf)
166 {
167 kmem_free(buf, HIS_MAX_RECORD_LEN);
168 }
169
170 static char *
171 history_str_get(zfs_cmd_t *zc)
172 {
173 char *buf;
174
175 if (zc->zc_history == NULL)
176 return (NULL);
177
178 buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
179 if (copyinstr((void *)(uintptr_t)zc->zc_history,
180 buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
181 history_str_free(buf);
182 return (NULL);
183 }
184
185 buf[HIS_MAX_RECORD_LEN -1] = '\0';
186
187 return (buf);
188 }
189
190 /*
191 * Check to see if the named dataset is currently defined as bootable
192 */
193 static boolean_t
194 zfs_is_bootfs(const char *name)
195 {
196 objset_t *os;
197
198 if (dmu_objset_hold(name, FTAG, &os) == 0) {
199 boolean_t ret;
200 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
201 dmu_objset_rele(os, FTAG);
202 return (ret);
203 }
204 return (B_FALSE);
205 }
206
207 /*
208 * zfs_earlier_version
209 *
210 * Return non-zero if the spa version is less than requested version.
211 */
212 static int
213 zfs_earlier_version(const char *name, int version)
214 {
215 spa_t *spa;
216
217 if (spa_open(name, &spa, FTAG) == 0) {
218 if (spa_version(spa) < version) {
219 spa_close(spa, FTAG);
220 return (1);
221 }
222 spa_close(spa, FTAG);
223 }
224 return (0);
225 }
226
227 /*
228 * zpl_earlier_version
229 *
230 * Return TRUE if the ZPL version is less than requested version.
231 */
232 static boolean_t
233 zpl_earlier_version(const char *name, int version)
234 {
235 objset_t *os;
236 boolean_t rc = B_TRUE;
237
238 if (dmu_objset_hold(name, FTAG, &os) == 0) {
239 uint64_t zplversion;
240
241 if (dmu_objset_type(os) != DMU_OST_ZFS) {
242 dmu_objset_rele(os, FTAG);
243 return (B_TRUE);
244 }
245 /* XXX reading from non-owned objset */
246 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
247 rc = zplversion < version;
248 dmu_objset_rele(os, FTAG);
249 }
250 return (rc);
251 }
252
253 static void
254 zfs_log_history(zfs_cmd_t *zc)
255 {
256 spa_t *spa;
257 char *buf;
258
259 if ((buf = history_str_get(zc)) == NULL)
260 return;
261
262 if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
263 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
264 (void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
265 spa_close(spa, FTAG);
266 }
267 history_str_free(buf);
268 }
269
270 /*
271 * Policy for top-level read operations (list pools). Requires no privileges,
272 * and can be used in the local zone, as there is no associated dataset.
273 */
274 /* ARGSUSED */
275 static int
276 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
277 {
278 return (0);
279 }
280
281 /*
282 * Policy for dataset read operations (list children, get statistics). Requires
283 * no privileges, but must be visible in the local zone.
284 */
285 /* ARGSUSED */
286 static int
287 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
288 {
289 if (INGLOBALZONE(curproc) ||
290 zone_dataset_visible(zc->zc_name, NULL))
291 return (0);
292
293 return (ENOENT);
294 }
295
296 static int
297 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
298 {
299 int writable = 1;
300
301 /*
302 * The dataset must be visible by this zone -- check this first
303 * so they don't see EPERM on something they shouldn't know about.
304 */
305 if (!INGLOBALZONE(curproc) &&
306 !zone_dataset_visible(dataset, &writable))
307 return (ENOENT);
308
309 if (INGLOBALZONE(curproc)) {
310 /*
311 * If the fs is zoned, only root can access it from the
312 * global zone.
313 */
314 if (secpolicy_zfs(cr) && zoned)
315 return (EPERM);
316 } else {
317 /*
318 * If we are in a local zone, the 'zoned' property must be set.
319 */
320 if (!zoned)
321 return (EPERM);
322
323 /* must be writable by this zone */
324 if (!writable)
325 return (EPERM);
326 }
327 return (0);
328 }
329
330 static int
331 zfs_dozonecheck(const char *dataset, cred_t *cr)
332 {
333 uint64_t zoned;
334
335 if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
336 return (ENOENT);
337
338 return (zfs_dozonecheck_impl(dataset, zoned, cr));
339 }
340
341 static int
342 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
343 {
344 uint64_t zoned;
345
346 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
347 if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
348 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
349 return (ENOENT);
350 }
351 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
352
353 return (zfs_dozonecheck_impl(dataset, zoned, cr));
354 }
355
356 /*
357 * If name ends in a '@', then require recursive permissions.
358 */
359 int
360 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
361 {
362 int error;
363 boolean_t descendent = B_FALSE;
364 dsl_dataset_t *ds;
365 char *at;
366
367 at = strchr(name, '@');
368 if (at != NULL && at[1] == '\0') {
369 *at = '\0';
370 descendent = B_TRUE;
371 }
372
373 error = dsl_dataset_hold(name, FTAG, &ds);
374 if (at != NULL)
375 *at = '@';
376 if (error != 0)
377 return (error);
378
379 error = zfs_dozonecheck_ds(name, ds, cr);
380 if (error == 0) {
381 error = secpolicy_zfs(cr);
382 if (error)
383 error = dsl_deleg_access_impl(ds, descendent, perm, cr);
384 }
385
386 dsl_dataset_rele(ds, FTAG);
387 return (error);
388 }
389
390 int
391 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
392 const char *perm, cred_t *cr)
393 {
394 int error;
395
396 error = zfs_dozonecheck_ds(name, ds, cr);
397 if (error == 0) {
398 error = secpolicy_zfs(cr);
399 if (error)
400 error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
401 }
402 return (error);
403 }
404
405 /*
406 * Policy for setting the security label property.
407 *
408 * Returns 0 for success, non-zero for access and other errors.
409 */
410 static int
411 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
412 {
413 char ds_hexsl[MAXNAMELEN];
414 bslabel_t ds_sl, new_sl;
415 boolean_t new_default = FALSE;
416 uint64_t zoned;
417 int needed_priv = -1;
418 int error;
419
420 /* First get the existing dataset label. */
421 error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
422 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
423 if (error)
424 return (EPERM);
425
426 if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
427 new_default = TRUE;
428
429 /* The label must be translatable */
430 if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
431 return (EINVAL);
432
433 /*
434 * In a non-global zone, disallow attempts to set a label that
435 * doesn't match that of the zone; otherwise no other checks
436 * are needed.
437 */
438 if (!INGLOBALZONE(curproc)) {
439 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
440 return (EPERM);
441 return (0);
442 }
443
444 /*
445 * For global-zone datasets (i.e., those whose zoned property is
446 * "off", verify that the specified new label is valid for the
447 * global zone.
448 */
449 if (dsl_prop_get_integer(name,
450 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
451 return (EPERM);
452 if (!zoned) {
453 if (zfs_check_global_label(name, strval) != 0)
454 return (EPERM);
455 }
456
457 /*
458 * If the existing dataset label is nondefault, check if the
459 * dataset is mounted (label cannot be changed while mounted).
460 * Get the zfsvfs; if there isn't one, then the dataset isn't
461 * mounted (or isn't a dataset, doesn't exist, ...).
462 */
463 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
464 objset_t *os;
465 static char *setsl_tag = "setsl_tag";
466
467 /*
468 * Try to own the dataset; abort if there is any error,
469 * (e.g., already mounted, in use, or other error).
470 */
471 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
472 setsl_tag, &os);
473 if (error)
474 return (EPERM);
475
476 dmu_objset_disown(os, setsl_tag);
477
478 if (new_default) {
479 needed_priv = PRIV_FILE_DOWNGRADE_SL;
480 goto out_check;
481 }
482
483 if (hexstr_to_label(strval, &new_sl) != 0)
484 return (EPERM);
485
486 if (blstrictdom(&ds_sl, &new_sl))
487 needed_priv = PRIV_FILE_DOWNGRADE_SL;
488 else if (blstrictdom(&new_sl, &ds_sl))
489 needed_priv = PRIV_FILE_UPGRADE_SL;
490 } else {
491 /* dataset currently has a default label */
492 if (!new_default)
493 needed_priv = PRIV_FILE_UPGRADE_SL;
494 }
495
496 out_check:
497 if (needed_priv != -1)
498 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
499 return (0);
500 }
501
502 static int
503 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
504 cred_t *cr)
505 {
506 char *strval;
507
508 /*
509 * Check permissions for special properties.
510 */
511 switch (prop) {
512 case ZFS_PROP_ZONED:
513 /*
514 * Disallow setting of 'zoned' from within a local zone.
515 */
516 if (!INGLOBALZONE(curproc))
517 return (EPERM);
518 break;
519
520 case ZFS_PROP_QUOTA:
521 if (!INGLOBALZONE(curproc)) {
522 uint64_t zoned;
523 char setpoint[MAXNAMELEN];
524 /*
525 * Unprivileged users are allowed to modify the
526 * quota on things *under* (ie. contained by)
527 * the thing they own.
528 */
529 if (dsl_prop_get_integer(dsname, "zoned", &zoned,
530 setpoint))
531 return (EPERM);
532 if (!zoned || strlen(dsname) <= strlen(setpoint))
533 return (EPERM);
534 }
535 break;
536
537 case ZFS_PROP_MLSLABEL:
538 if (!is_system_labeled())
539 return (EPERM);
540
541 if (nvpair_value_string(propval, &strval) == 0) {
542 int err;
543
544 err = zfs_set_slabel_policy(dsname, strval, CRED());
545 if (err != 0)
546 return (err);
547 }
548 break;
549 }
550
551 return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
552 }
553
554 int
555 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
556 {
557 int error;
558
559 error = zfs_dozonecheck(zc->zc_name, cr);
560 if (error)
561 return (error);
562
563 /*
564 * permission to set permissions will be evaluated later in
565 * dsl_deleg_can_allow()
566 */
567 return (0);
568 }
569
570 int
571 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
572 {
573 return (zfs_secpolicy_write_perms(zc->zc_name,
574 ZFS_DELEG_PERM_ROLLBACK, cr));
575 }
576
577 int
578 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
579 {
580 spa_t *spa;
581 dsl_pool_t *dp;
582 dsl_dataset_t *ds;
583 char *cp;
584 int error;
585
586 /*
587 * Generate the current snapshot name from the given objsetid, then
588 * use that name for the secpolicy/zone checks.
589 */
590 cp = strchr(zc->zc_name, '@');
591 if (cp == NULL)
592 return (EINVAL);
593 error = spa_open(zc->zc_name, &spa, FTAG);
594 if (error)
595 return (error);
596
597 dp = spa_get_dsl(spa);
598 rw_enter(&dp->dp_config_rwlock, RW_READER);
599 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
600 rw_exit(&dp->dp_config_rwlock);
601 spa_close(spa, FTAG);
602 if (error)
603 return (error);
604
605 dsl_dataset_name(ds, zc->zc_name);
606
607 error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
608 ZFS_DELEG_PERM_SEND, cr);
609 dsl_dataset_rele(ds, FTAG);
610
611 return (error);
612 }
613
614 static int
615 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
616 {
617 vnode_t *vp;
618 int error;
619
620 if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
621 NO_FOLLOW, NULL, &vp)) != 0)
622 return (error);
623
624 /* Now make sure mntpnt and dataset are ZFS */
625
626 if (vp->v_vfsp->vfs_fstype != zfsfstype ||
627 (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
628 zc->zc_name) != 0)) {
629 VN_RELE(vp);
630 return (EPERM);
631 }
632
633 VN_RELE(vp);
634 return (dsl_deleg_access(zc->zc_name,
635 ZFS_DELEG_PERM_SHARE, cr));
636 }
637
638 int
639 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
640 {
641 if (!INGLOBALZONE(curproc))
642 return (EPERM);
643
644 if (secpolicy_nfs(cr) == 0) {
645 return (0);
646 } else {
647 return (zfs_secpolicy_deleg_share(zc, cr));
648 }
649 }
650
651 int
652 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
653 {
654 if (!INGLOBALZONE(curproc))
655 return (EPERM);
656
657 if (secpolicy_smb(cr) == 0) {
658 return (0);
659 } else {
660 return (zfs_secpolicy_deleg_share(zc, cr));
661 }
662 }
663
664 static int
665 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
666 {
667 char *cp;
668
669 /*
670 * Remove the @bla or /bla from the end of the name to get the parent.
671 */
672 (void) strncpy(parent, datasetname, parentsize);
673 cp = strrchr(parent, '@');
674 if (cp != NULL) {
675 cp[0] = '\0';
676 } else {
677 cp = strrchr(parent, '/');
678 if (cp == NULL)
679 return (ENOENT);
680 cp[0] = '\0';
681 }
682
683 return (0);
684 }
685
686 int
687 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
688 {
689 int error;
690
691 if ((error = zfs_secpolicy_write_perms(name,
692 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
693 return (error);
694
695 return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
696 }
697
698 static int
699 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
700 {
701 return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
702 }
703
704 /*
705 * Destroying snapshots with delegated permissions requires
706 * descendent mount and destroy permissions.
707 */
708 static int
709 zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
710 {
711 int error;
712 char *dsname;
713
714 dsname = kmem_asprintf("%s@", zc->zc_name);
715
716 error = zfs_secpolicy_destroy_perms(dsname, cr);
717
718 strfree(dsname);
719 return (error);
720 }
721
722 int
723 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
724 {
725 char parentname[MAXNAMELEN];
726 int error;
727
728 if ((error = zfs_secpolicy_write_perms(from,
729 ZFS_DELEG_PERM_RENAME, cr)) != 0)
730 return (error);
731
732 if ((error = zfs_secpolicy_write_perms(from,
733 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
734 return (error);
735
736 if ((error = zfs_get_parent(to, parentname,
737 sizeof (parentname))) != 0)
738 return (error);
739
740 if ((error = zfs_secpolicy_write_perms(parentname,
741 ZFS_DELEG_PERM_CREATE, cr)) != 0)
742 return (error);
743
744 if ((error = zfs_secpolicy_write_perms(parentname,
745 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
746 return (error);
747
748 return (error);
749 }
750
751 static int
752 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
753 {
754 return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
755 }
756
757 static int
758 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
759 {
760 char parentname[MAXNAMELEN];
761 objset_t *clone;
762 int error;
763
764 error = zfs_secpolicy_write_perms(zc->zc_name,
765 ZFS_DELEG_PERM_PROMOTE, cr);
766 if (error)
767 return (error);
768
769 error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
770
771 if (error == 0) {
772 dsl_dataset_t *pclone = NULL;
773 dsl_dir_t *dd;
774 dd = clone->os_dsl_dataset->ds_dir;
775
776 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
777 error = dsl_dataset_hold_obj(dd->dd_pool,
778 dd->dd_phys->dd_origin_obj, FTAG, &pclone);
779 rw_exit(&dd->dd_pool->dp_config_rwlock);
780 if (error) {
781 dmu_objset_rele(clone, FTAG);
782 return (error);
783 }
784
785 error = zfs_secpolicy_write_perms(zc->zc_name,
786 ZFS_DELEG_PERM_MOUNT, cr);
787
788 dsl_dataset_name(pclone, parentname);
789 dmu_objset_rele(clone, FTAG);
790 dsl_dataset_rele(pclone, FTAG);
791 if (error == 0)
792 error = zfs_secpolicy_write_perms(parentname,
793 ZFS_DELEG_PERM_PROMOTE, cr);
794 }
795 return (error);
796 }
797
798 static int
799 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
800 {
801 int error;
802
803 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
804 ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
805 return (error);
806
807 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
808 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
809 return (error);
810
811 return (zfs_secpolicy_write_perms(zc->zc_name,
812 ZFS_DELEG_PERM_CREATE, cr));
813 }
814
815 int
816 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
817 {
818 return (zfs_secpolicy_write_perms(name,
819 ZFS_DELEG_PERM_SNAPSHOT, cr));
820 }
821
822 static int
823 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
824 {
825
826 return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
827 }
828
829 static int
830 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
831 {
832 char parentname[MAXNAMELEN];
833 int error;
834
835 if ((error = zfs_get_parent(zc->zc_name, parentname,
836 sizeof (parentname))) != 0)
837 return (error);
838
839 if (zc->zc_value[0] != '\0') {
840 if ((error = zfs_secpolicy_write_perms(zc->zc_value,
841 ZFS_DELEG_PERM_CLONE, cr)) != 0)
842 return (error);
843 }
844
845 if ((error = zfs_secpolicy_write_perms(parentname,
846 ZFS_DELEG_PERM_CREATE, cr)) != 0)
847 return (error);
848
849 error = zfs_secpolicy_write_perms(parentname,
850 ZFS_DELEG_PERM_MOUNT, cr);
851
852 return (error);
853 }
854
855 static int
856 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
857 {
858 int error;
859
860 error = secpolicy_fs_unmount(cr, NULL);
861 if (error) {
862 error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
863 }
864 return (error);
865 }
866
867 /*
868 * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires
869 * SYS_CONFIG privilege, which is not available in a local zone.
870 */
871 /* ARGSUSED */
872 static int
873 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
874 {
875 if (secpolicy_sys_config(cr, B_FALSE) != 0)
876 return (EPERM);
877
878 return (0);
879 }
880
881 /*
882 * Policy for object to name lookups.
883 */
884 /* ARGSUSED */
885 static int
886 zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
887 {
888 int error;
889
890 if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
891 return (0);
892
893 error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
894 return (error);
895 }
896
897 /*
898 * Policy for fault injection. Requires all privileges.
899 */
900 /* ARGSUSED */
901 static int
902 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
903 {
904 return (secpolicy_zinject(cr));
905 }
906
907 static int
908 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
909 {
910 zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
911
912 if (prop == ZPROP_INVAL) {
913 if (!zfs_prop_user(zc->zc_value))
914 return (EINVAL);
915 return (zfs_secpolicy_write_perms(zc->zc_name,
916 ZFS_DELEG_PERM_USERPROP, cr));
917 } else {
918 return (zfs_secpolicy_setprop(zc->zc_name, prop,
919 NULL, cr));
920 }
921 }
922
923 static int
924 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
925 {
926 int err = zfs_secpolicy_read(zc, cr);
927 if (err)
928 return (err);
929
930 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
931 return (EINVAL);
932
933 if (zc->zc_value[0] == 0) {
934 /*
935 * They are asking about a posix uid/gid. If it's
936 * themself, allow it.
937 */
938 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
939 zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
940 if (zc->zc_guid == crgetuid(cr))
941 return (0);
942 } else {
943 if (groupmember(zc->zc_guid, cr))
944 return (0);
945 }
946 }
947
948 return (zfs_secpolicy_write_perms(zc->zc_name,
949 userquota_perms[zc->zc_objset_type], cr));
950 }
951
952 static int
953 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
954 {
955 int err = zfs_secpolicy_read(zc, cr);
956 if (err)
957 return (err);
958
959 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
960 return (EINVAL);
961
962 return (zfs_secpolicy_write_perms(zc->zc_name,
963 userquota_perms[zc->zc_objset_type], cr));
964 }
965
966 static int
967 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
968 {
969 return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
970 NULL, cr));
971 }
972
973 static int
974 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
975 {
976 return (zfs_secpolicy_write_perms(zc->zc_name,
977 ZFS_DELEG_PERM_HOLD, cr));
978 }
979
980 static int
981 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
982 {
983 return (zfs_secpolicy_write_perms(zc->zc_name,
984 ZFS_DELEG_PERM_RELEASE, cr));
985 }
986
987 /*
988 * Policy for allowing temporary snapshots to be taken or released
989 */
990 static int
991 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
992 {
993 /*
994 * A temporary snapshot is the same as a snapshot,
995 * hold, destroy and release all rolled into one.
996 * Delegated diff alone is sufficient that we allow this.
997 */
998 int error;
999
1000 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1001 ZFS_DELEG_PERM_DIFF, cr)) == 0)
1002 return (0);
1003
1004 error = zfs_secpolicy_snapshot(zc, cr);
1005 if (!error)
1006 error = zfs_secpolicy_hold(zc, cr);
1007 if (!error)
1008 error = zfs_secpolicy_release(zc, cr);
1009 if (!error)
1010 error = zfs_secpolicy_destroy(zc, cr);
1011 return (error);
1012 }
1013
1014 /*
1015 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1016 */
1017 static int
1018 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1019 {
1020 char *packed;
1021 int error;
1022 nvlist_t *list = NULL;
1023
1024 /*
1025 * Read in and unpack the user-supplied nvlist.
1026 */
1027 if (size == 0)
1028 return (EINVAL);
1029
1030 packed = kmem_alloc(size, KM_SLEEP);
1031
1032 if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1033 iflag)) != 0) {
1034 kmem_free(packed, size);
1035 return (error);
1036 }
1037
1038 if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1039 kmem_free(packed, size);
1040 return (error);
1041 }
1042
1043 kmem_free(packed, size);
1044
1045 *nvp = list;
1046 return (0);
1047 }
1048
1049 static int
1050 fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
1051 {
1052 size_t size;
1053
1054 VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1055
1056 if (size > zc->zc_nvlist_dst_size) {
1057 nvpair_t *more_errors;
1058 int n = 0;
1059
1060 if (zc->zc_nvlist_dst_size < 1024)
1061 return (ENOMEM);
1062
1063 VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
1064 more_errors = nvlist_prev_nvpair(*errors, NULL);
1065
1066 do {
1067 nvpair_t *pair = nvlist_prev_nvpair(*errors,
1068 more_errors);
1069 VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
1070 n++;
1071 VERIFY(nvlist_size(*errors, &size,
1072 NV_ENCODE_NATIVE) == 0);
1073 } while (size > zc->zc_nvlist_dst_size);
1074
1075 VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
1076 VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
1077 ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1078 ASSERT(size <= zc->zc_nvlist_dst_size);
1079 }
1080
1081 return (0);
1082 }
1083
1084 static int
1085 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1086 {
1087 char *packed = NULL;
1088 int error = 0;
1089 size_t size;
1090
1091 VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
1092
1093 if (size > zc->zc_nvlist_dst_size) {
1094 error = ENOMEM;
1095 } else {
1096 packed = kmem_alloc(size, KM_SLEEP);
1097 VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
1098 KM_SLEEP) == 0);
1099 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1100 size, zc->zc_iflags) != 0)
1101 error = EFAULT;
1102 kmem_free(packed, size);
1103 }
1104
1105 zc->zc_nvlist_dst_size = size;
1106 return (error);
1107 }
1108
1109 static int
1110 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1111 {
1112 objset_t *os;
1113 int error;
1114
1115 error = dmu_objset_hold(dsname, FTAG, &os);
1116 if (error)
1117 return (error);
1118 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1119 dmu_objset_rele(os, FTAG);
1120 return (EINVAL);
1121 }
1122
1123 mutex_enter(&os->os_user_ptr_lock);
1124 *zfvp = dmu_objset_get_user(os);
1125 if (*zfvp) {
1126 VFS_HOLD((*zfvp)->z_vfs);
1127 } else {
1128 error = ESRCH;
1129 }
1130 mutex_exit(&os->os_user_ptr_lock);
1131 dmu_objset_rele(os, FTAG);
1132 return (error);
1133 }
1134
1135 /*
1136 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1137 * case its z_vfs will be NULL, and it will be opened as the owner.
1138 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1139 * which prevents all vnode ops from running.
1140 */
1141 static int
1142 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1143 {
1144 int error = 0;
1145
1146 if (getzfsvfs(name, zfvp) != 0)
1147 error = zfsvfs_create(name, zfvp);
1148 if (error == 0) {
1149 rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1150 RW_READER, tag);
1151 if ((*zfvp)->z_unmounted) {
1152 /*
1153 * XXX we could probably try again, since the unmounting
1154 * thread should be just about to disassociate the
1155 * objset from the zfsvfs.
1156 */
1157 rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1158 return (EBUSY);
1159 }
1160 }
1161 return (error);
1162 }
1163
1164 static void
1165 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1166 {
1167 rrw_exit(&zfsvfs->z_teardown_lock, tag);
1168
1169 if (zfsvfs->z_vfs) {
1170 VFS_RELE(zfsvfs->z_vfs);
1171 } else {
1172 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1173 zfsvfs_free(zfsvfs);
1174 }
1175 }
1176
1177 static int
1178 zfs_ioc_pool_create(zfs_cmd_t *zc)
1179 {
1180 int error;
1181 nvlist_t *config, *props = NULL;
1182 nvlist_t *rootprops = NULL;
1183 nvlist_t *zplprops = NULL;
1184 char *buf;
1185
1186 if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1187 zc->zc_iflags, &config))
1188 return (error);
1189
1190 if (zc->zc_nvlist_src_size != 0 && (error =
1191 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1192 zc->zc_iflags, &props))) {
1193 nvlist_free(config);
1194 return (error);
1195 }
1196
1197 if (props) {
1198 nvlist_t *nvl = NULL;
1199 uint64_t version = SPA_VERSION;
1200
1201 (void) nvlist_lookup_uint64(props,
1202 zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1203 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1204 error = EINVAL;
1205 goto pool_props_bad;
1206 }
1207 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1208 if (nvl) {
1209 error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1210 if (error != 0) {
1211 nvlist_free(config);
1212 nvlist_free(props);
1213 return (error);
1214 }
1215 (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1216 }
1217 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1218 error = zfs_fill_zplprops_root(version, rootprops,
1219 zplprops, NULL);
1220 if (error)
1221 goto pool_props_bad;
1222 }
1223
1224 buf = history_str_get(zc);
1225
1226 error = spa_create(zc->zc_name, config, props, buf, zplprops);
1227
1228 /*
1229 * Set the remaining root properties
1230 */
1231 if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1232 ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1233 (void) spa_destroy(zc->zc_name);
1234
1235 if (buf != NULL)
1236 history_str_free(buf);
1237
1238 pool_props_bad:
1239 nvlist_free(rootprops);
1240 nvlist_free(zplprops);
1241 nvlist_free(config);
1242 nvlist_free(props);
1243
1244 return (error);
1245 }
1246
1247 static int
1248 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1249 {
1250 int error;
1251 zfs_log_history(zc);
1252 error = spa_destroy(zc->zc_name);
1253 if (error == 0)
1254 zvol_remove_minors(zc->zc_name);
1255 return (error);
1256 }
1257
1258 static int
1259 zfs_ioc_pool_import(zfs_cmd_t *zc)
1260 {
1261 nvlist_t *config, *props = NULL;
1262 uint64_t guid;
1263 int error;
1264
1265 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1266 zc->zc_iflags, &config)) != 0)
1267 return (error);
1268
1269 if (zc->zc_nvlist_src_size != 0 && (error =
1270 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1271 zc->zc_iflags, &props))) {
1272 nvlist_free(config);
1273 return (error);
1274 }
1275
1276 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1277 guid != zc->zc_guid)
1278 error = EINVAL;
1279 else
1280 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1281
1282 if (zc->zc_nvlist_dst != 0) {
1283 int err;
1284
1285 if ((err = put_nvlist(zc, config)) != 0)
1286 error = err;
1287 }
1288
1289 nvlist_free(config);
1290
1291 if (props)
1292 nvlist_free(props);
1293
1294 return (error);
1295 }
1296
1297 static int
1298 zfs_ioc_pool_export(zfs_cmd_t *zc)
1299 {
1300 int error;
1301 boolean_t force = (boolean_t)zc->zc_cookie;
1302 boolean_t hardforce = (boolean_t)zc->zc_guid;
1303
1304 zfs_log_history(zc);
1305 error = spa_export(zc->zc_name, NULL, force, hardforce);
1306 if (error == 0)
1307 zvol_remove_minors(zc->zc_name);
1308 return (error);
1309 }
1310
1311 static int
1312 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1313 {
1314 nvlist_t *configs;
1315 int error;
1316
1317 if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1318 return (EEXIST);
1319
1320 error = put_nvlist(zc, configs);
1321
1322 nvlist_free(configs);
1323
1324 return (error);
1325 }
1326
1327 /*
1328 * inputs:
1329 * zc_name name of the pool
1330 *
1331 * outputs:
1332 * zc_cookie real errno
1333 * zc_nvlist_dst config nvlist
1334 * zc_nvlist_dst_size size of config nvlist
1335 */
1336 static int
1337 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1338 {
1339 nvlist_t *config;
1340 int error;
1341 int ret = 0;
1342
1343 error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1344 sizeof (zc->zc_value));
1345
1346 if (config != NULL) {
1347 ret = put_nvlist(zc, config);
1348 nvlist_free(config);
1349
1350 /*
1351 * The config may be present even if 'error' is non-zero.
1352 * In this case we return success, and preserve the real errno
1353 * in 'zc_cookie'.
1354 */
1355 zc->zc_cookie = error;
1356 } else {
1357 ret = error;
1358 }
1359
1360 return (ret);
1361 }
1362
1363 /*
1364 * Try to import the given pool, returning pool stats as appropriate so that
1365 * user land knows which devices are available and overall pool health.
1366 */
1367 static int
1368 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1369 {
1370 nvlist_t *tryconfig, *config;
1371 int error;
1372
1373 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1374 zc->zc_iflags, &tryconfig)) != 0)
1375 return (error);
1376
1377 config = spa_tryimport(tryconfig);
1378
1379 nvlist_free(tryconfig);
1380
1381 if (config == NULL)
1382 return (EINVAL);
1383
1384 error = put_nvlist(zc, config);
1385 nvlist_free(config);
1386
1387 return (error);
1388 }
1389
1390 /*
1391 * inputs:
1392 * zc_name name of the pool
1393 * zc_cookie scan func (pool_scan_func_t)
1394 */
1395 static int
1396 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1397 {
1398 spa_t *spa;
1399 int error;
1400
1401 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1402 return (error);
1403
1404 if (zc->zc_cookie == POOL_SCAN_NONE)
1405 error = spa_scan_stop(spa);
1406 else
1407 error = spa_scan(spa, zc->zc_cookie);
1408
1409 spa_close(spa, FTAG);
1410
1411 return (error);
1412 }
1413
1414 static int
1415 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1416 {
1417 spa_t *spa;
1418 int error;
1419
1420 error = spa_open(zc->zc_name, &spa, FTAG);
1421 if (error == 0) {
1422 spa_freeze(spa);
1423 spa_close(spa, FTAG);
1424 }
1425 return (error);
1426 }
1427
1428 static int
1429 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1430 {
1431 spa_t *spa;
1432 int error;
1433
1434 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1435 return (error);
1436
1437 if (zc->zc_cookie < spa_version(spa) ||
1438 !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1439 spa_close(spa, FTAG);
1440 return (EINVAL);
1441 }
1442
1443 spa_upgrade(spa, zc->zc_cookie);
1444 spa_close(spa, FTAG);
1445
1446 return (error);
1447 }
1448
1449 static int
1450 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1451 {
1452 spa_t *spa;
1453 char *hist_buf;
1454 uint64_t size;
1455 int error;
1456
1457 if ((size = zc->zc_history_len) == 0)
1458 return (EINVAL);
1459
1460 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1461 return (error);
1462
1463 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1464 spa_close(spa, FTAG);
1465 return (ENOTSUP);
1466 }
1467
1468 hist_buf = kmem_alloc(size, KM_SLEEP);
1469 if ((error = spa_history_get(spa, &zc->zc_history_offset,
1470 &zc->zc_history_len, hist_buf)) == 0) {
1471 error = ddi_copyout(hist_buf,
1472 (void *)(uintptr_t)zc->zc_history,
1473 zc->zc_history_len, zc->zc_iflags);
1474 }
1475
1476 spa_close(spa, FTAG);
1477 kmem_free(hist_buf, size);
1478 return (error);
1479 }
1480
1481 static int
1482 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1483 {
1484 spa_t *spa;
1485 int error;
1486
1487 error = spa_open(zc->zc_name, &spa, FTAG);
1488 if (error == 0) {
1489 error = spa_change_guid(spa);
1490 spa_close(spa, FTAG);
1491 }
1492 return (error);
1493 }
1494
1495 static int
1496 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1497 {
1498 int error;
1499
1500 if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1501 return (error);
1502
1503 return (0);
1504 }
1505
1506 /*
1507 * inputs:
1508 * zc_name name of filesystem
1509 * zc_obj object to find
1510 *
1511 * outputs:
1512 * zc_value name of object
1513 */
1514 static int
1515 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1516 {
1517 objset_t *os;
1518 int error;
1519
1520 /* XXX reading from objset not owned */
1521 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1522 return (error);
1523 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1524 dmu_objset_rele(os, FTAG);
1525 return (EINVAL);
1526 }
1527 error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1528 sizeof (zc->zc_value));
1529 dmu_objset_rele(os, FTAG);
1530
1531 return (error);
1532 }
1533
1534 /*
1535 * inputs:
1536 * zc_name name of filesystem
1537 * zc_obj object to find
1538 *
1539 * outputs:
1540 * zc_stat stats on object
1541 * zc_value path to object
1542 */
1543 static int
1544 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1545 {
1546 objset_t *os;
1547 int error;
1548
1549 /* XXX reading from objset not owned */
1550 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1551 return (error);
1552 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1553 dmu_objset_rele(os, FTAG);
1554 return (EINVAL);
1555 }
1556 error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1557 sizeof (zc->zc_value));
1558 dmu_objset_rele(os, FTAG);
1559
1560 return (error);
1561 }
1562
1563 static int
1564 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1565 {
1566 spa_t *spa;
1567 int error;
1568 nvlist_t *config, **l2cache, **spares;
1569 uint_t nl2cache = 0, nspares = 0;
1570
1571 error = spa_open(zc->zc_name, &spa, FTAG);
1572 if (error != 0)
1573 return (error);
1574
1575 error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1576 zc->zc_iflags, &config);
1577 (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1578 &l2cache, &nl2cache);
1579
1580 (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1581 &spares, &nspares);
1582
1583 /*
1584 * A root pool with concatenated devices is not supported.
1585 * Thus, can not add a device to a root pool.
1586 *
1587 * Intent log device can not be added to a rootpool because
1588 * during mountroot, zil is replayed, a seperated log device
1589 * can not be accessed during the mountroot time.
1590 *
1591 * l2cache and spare devices are ok to be added to a rootpool.
1592 */
1593 if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1594 nvlist_free(config);
1595 spa_close(spa, FTAG);
1596 return (EDOM);
1597 }
1598
1599 if (error == 0) {
1600 error = spa_vdev_add(spa, config);
1601 nvlist_free(config);
1602 }
1603 spa_close(spa, FTAG);
1604 return (error);
1605 }
1606
1607 /*
1608 * inputs:
1609 * zc_name name of the pool
1610 * zc_nvlist_conf nvlist of devices to remove
1611 * zc_cookie to stop the remove?
1612 */
1613 static int
1614 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1615 {
1616 spa_t *spa;
1617 int error;
1618
1619 error = spa_open(zc->zc_name, &spa, FTAG);
1620 if (error != 0)
1621 return (error);
1622 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1623 spa_close(spa, FTAG);
1624 return (error);
1625 }
1626
1627 static int
1628 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1629 {
1630 spa_t *spa;
1631 int error;
1632 vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1633
1634 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1635 return (error);
1636 switch (zc->zc_cookie) {
1637 case VDEV_STATE_ONLINE:
1638 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1639 break;
1640
1641 case VDEV_STATE_OFFLINE:
1642 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1643 break;
1644
1645 case VDEV_STATE_FAULTED:
1646 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1647 zc->zc_obj != VDEV_AUX_EXTERNAL)
1648 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1649
1650 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1651 break;
1652
1653 case VDEV_STATE_DEGRADED:
1654 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1655 zc->zc_obj != VDEV_AUX_EXTERNAL)
1656 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1657
1658 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1659 break;
1660
1661 default:
1662 error = EINVAL;
1663 }
1664 zc->zc_cookie = newstate;
1665 spa_close(spa, FTAG);
1666 return (error);
1667 }
1668
1669 static int
1670 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1671 {
1672 spa_t *spa;
1673 int replacing = zc->zc_cookie;
1674 nvlist_t *config;
1675 int error;
1676
1677 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1678 return (error);
1679
1680 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1681 zc->zc_iflags, &config)) == 0) {
1682 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1683 nvlist_free(config);
1684 }
1685
1686 spa_close(spa, FTAG);
1687 return (error);
1688 }
1689
1690 static int
1691 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1692 {
1693 spa_t *spa;
1694 int error;
1695
1696 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1697 return (error);
1698
1699 error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1700
1701 spa_close(spa, FTAG);
1702 return (error);
1703 }
1704
1705 static int
1706 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1707 {
1708 spa_t *spa;
1709 nvlist_t *config, *props = NULL;
1710 int error;
1711 boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1712
1713 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1714 return (error);
1715
1716 if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1717 zc->zc_iflags, &config)) {
1718 spa_close(spa, FTAG);
1719 return (error);
1720 }
1721
1722 if (zc->zc_nvlist_src_size != 0 && (error =
1723 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1724 zc->zc_iflags, &props))) {
1725 spa_close(spa, FTAG);
1726 nvlist_free(config);
1727 return (error);
1728 }
1729
1730 error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1731
1732 spa_close(spa, FTAG);
1733
1734 nvlist_free(config);
1735 nvlist_free(props);
1736
1737 return (error);
1738 }
1739
1740 static int
1741 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1742 {
1743 spa_t *spa;
1744 char *path = zc->zc_value;
1745 uint64_t guid = zc->zc_guid;
1746 int error;
1747
1748 error = spa_open(zc->zc_name, &spa, FTAG);
1749 if (error != 0)
1750 return (error);
1751
1752 error = spa_vdev_setpath(spa, guid, path);
1753 spa_close(spa, FTAG);
1754 return (error);
1755 }
1756
1757 static int
1758 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1759 {
1760 spa_t *spa;
1761 char *fru = zc->zc_value;
1762 uint64_t guid = zc->zc_guid;
1763 int error;
1764
1765 error = spa_open(zc->zc_name, &spa, FTAG);
1766 if (error != 0)
1767 return (error);
1768
1769 error = spa_vdev_setfru(spa, guid, fru);
1770 spa_close(spa, FTAG);
1771 return (error);
1772 }
1773
1774 static int
1775 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
1776 {
1777 int error = 0;
1778 nvlist_t *nv;
1779
1780 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1781
1782 if (zc->zc_nvlist_dst != 0 &&
1783 (error = dsl_prop_get_all(os, &nv)) == 0) {
1784 dmu_objset_stats(os, nv);
1785 /*
1786 * NB: zvol_get_stats() will read the objset contents,
1787 * which we aren't supposed to do with a
1788 * DS_MODE_USER hold, because it could be
1789 * inconsistent. So this is a bit of a workaround...
1790 * XXX reading with out owning
1791 */
1792 if (!zc->zc_objset_stats.dds_inconsistent &&
1793 dmu_objset_type(os) == DMU_OST_ZVOL) {
1794 error = zvol_get_stats(os, nv);
1795 if (error == EIO)
1796 return (error);
1797 VERIFY3S(error, ==, 0);
1798 }
1799 error = put_nvlist(zc, nv);
1800 nvlist_free(nv);
1801 }
1802
1803 return (error);
1804 }
1805
1806 /*
1807 * inputs:
1808 * zc_name name of filesystem
1809 * zc_nvlist_dst_size size of buffer for property nvlist
1810 *
1811 * outputs:
1812 * zc_objset_stats stats
1813 * zc_nvlist_dst property nvlist
1814 * zc_nvlist_dst_size size of property nvlist
1815 */
1816 static int
1817 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1818 {
1819 objset_t *os = NULL;
1820 int error;
1821
1822 if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1823 return (error);
1824
1825 error = zfs_ioc_objset_stats_impl(zc, os);
1826
1827 dmu_objset_rele(os, FTAG);
1828
1829 return (error);
1830 }
1831
1832 /*
1833 * inputs:
1834 * zc_name name of filesystem
1835 * zc_nvlist_dst_size size of buffer for property nvlist
1836 *
1837 * outputs:
1838 * zc_nvlist_dst received property nvlist
1839 * zc_nvlist_dst_size size of received property nvlist
1840 *
1841 * Gets received properties (distinct from local properties on or after
1842 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1843 * local property values.
1844 */
1845 static int
1846 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1847 {
1848 objset_t *os = NULL;
1849 int error;
1850 nvlist_t *nv;
1851
1852 if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1853 return (error);
1854
1855 /*
1856 * Without this check, we would return local property values if the
1857 * caller has not already received properties on or after
1858 * SPA_VERSION_RECVD_PROPS.
1859 */
1860 if (!dsl_prop_get_hasrecvd(os)) {
1861 dmu_objset_rele(os, FTAG);
1862 return (ENOTSUP);
1863 }
1864
1865 if (zc->zc_nvlist_dst != 0 &&
1866 (error = dsl_prop_get_received(os, &nv)) == 0) {
1867 error = put_nvlist(zc, nv);
1868 nvlist_free(nv);
1869 }
1870
1871 dmu_objset_rele(os, FTAG);
1872 return (error);
1873 }
1874
1875 static int
1876 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1877 {
1878 uint64_t value;
1879 int error;
1880
1881 /*
1882 * zfs_get_zplprop() will either find a value or give us
1883 * the default value (if there is one).
1884 */
1885 if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1886 return (error);
1887 VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1888 return (0);
1889 }
1890
1891 /*
1892 * inputs:
1893 * zc_name name of filesystem
1894 * zc_nvlist_dst_size size of buffer for zpl property nvlist
1895 *
1896 * outputs:
1897 * zc_nvlist_dst zpl property nvlist
1898 * zc_nvlist_dst_size size of zpl property nvlist
1899 */
1900 static int
1901 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1902 {
1903 objset_t *os;
1904 int err;
1905
1906 /* XXX reading without owning */
1907 if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1908 return (err);
1909
1910 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1911
1912 /*
1913 * NB: nvl_add_zplprop() will read the objset contents,
1914 * which we aren't supposed to do with a DS_MODE_USER
1915 * hold, because it could be inconsistent.
1916 */
1917 if (zc->zc_nvlist_dst != NULL &&
1918 !zc->zc_objset_stats.dds_inconsistent &&
1919 dmu_objset_type(os) == DMU_OST_ZFS) {
1920 nvlist_t *nv;
1921
1922 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1923 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1924 (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1925 (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1926 (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1927 err = put_nvlist(zc, nv);
1928 nvlist_free(nv);
1929 } else {
1930 err = ENOENT;
1931 }
1932 dmu_objset_rele(os, FTAG);
1933 return (err);
1934 }
1935
1936 static boolean_t
1937 dataset_name_hidden(const char *name)
1938 {
1939 /*
1940 * Skip over datasets that are not visible in this zone,
1941 * internal datasets (which have a $ in their name), and
1942 * temporary datasets (which have a % in their name).
1943 */
1944 if (strchr(name, '$') != NULL)
1945 return (B_TRUE);
1946 if (strchr(name, '%') != NULL)
1947 return (B_TRUE);
1948 if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1949 return (B_TRUE);
1950 return (B_FALSE);
1951 }
1952
1953 /*
1954 * inputs:
1955 * zc_name name of filesystem
1956 * zc_cookie zap cursor
1957 * zc_nvlist_dst_size size of buffer for property nvlist
1958 *
1959 * outputs:
1960 * zc_name name of next filesystem
1961 * zc_cookie zap cursor
1962 * zc_objset_stats stats
1963 * zc_nvlist_dst property nvlist
1964 * zc_nvlist_dst_size size of property nvlist
1965 */
1966 static int
1967 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1968 {
1969 objset_t *os;
1970 int error;
1971 char *p;
1972 size_t orig_len = strlen(zc->zc_name);
1973
1974 top:
1975 if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1976 if (error == ENOENT)
1977 error = ESRCH;
1978 return (error);
1979 }
1980
1981 p = strrchr(zc->zc_name, '/');
1982 if (p == NULL || p[1] != '\0')
1983 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1984 p = zc->zc_name + strlen(zc->zc_name);
1985
1986 /*
1987 * Pre-fetch the datasets. dmu_objset_prefetch() always returns 0
1988 * but is not declared void because its called by dmu_objset_find().
1989 */
1990 if (zc->zc_cookie == 0) {
1991 uint64_t cookie = 0;
1992 int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1993
1994 while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
1995 if (!dataset_name_hidden(zc->zc_name))
1996 (void) dmu_objset_prefetch(zc->zc_name, NULL);
1997 }
1998 }
1999
2000 do {
2001 error = dmu_dir_list_next(os,
2002 sizeof (zc->zc_name) - (p - zc->zc_name), p,
2003 NULL, &zc->zc_cookie);
2004 if (error == ENOENT)
2005 error = ESRCH;
2006 } while (error == 0 && dataset_name_hidden(zc->zc_name));
2007 dmu_objset_rele(os, FTAG);
2008
2009 /*
2010 * If it's an internal dataset (ie. with a '$' in its name),
2011 * don't try to get stats for it, otherwise we'll return ENOENT.
2012 */
2013 if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2014 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2015 if (error == ENOENT) {
2016 /* We lost a race with destroy, get the next one. */
2017 zc->zc_name[orig_len] = '\0';
2018 goto top;
2019 }
2020 }
2021 return (error);
2022 }
2023
2024 /*
2025 * inputs:
2026 * zc_name name of filesystem
2027 * zc_cookie zap cursor
2028 * zc_nvlist_dst_size size of buffer for property nvlist
2029 *
2030 * outputs:
2031 * zc_name name of next snapshot
2032 * zc_objset_stats stats
2033 * zc_nvlist_dst property nvlist
2034 * zc_nvlist_dst_size size of property nvlist
2035 */
2036 static int
2037 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2038 {
2039 objset_t *os;
2040 int error;
2041
2042 top:
2043 if (zc->zc_cookie == 0)
2044 (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
2045 NULL, DS_FIND_SNAPSHOTS);
2046
2047 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2048 if (error)
2049 return (error == ENOENT ? ESRCH : error);
2050
2051 /*
2052 * A dataset name of maximum length cannot have any snapshots,
2053 * so exit immediately.
2054 */
2055 if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
2056 dmu_objset_rele(os, FTAG);
2057 return (ESRCH);
2058 }
2059
2060 error = dmu_snapshot_list_next(os,
2061 sizeof (zc->zc_name) - strlen(zc->zc_name),
2062 zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2063 NULL);
2064
2065 if (error == 0) {
2066 dsl_dataset_t *ds;
2067 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2068
2069 /*
2070 * Since we probably don't have a hold on this snapshot,
2071 * it's possible that the objsetid could have been destroyed
2072 * and reused for a new objset. It's OK if this happens during
2073 * a zfs send operation, since the new createtxg will be
2074 * beyond the range we're interested in.
2075 */
2076 rw_enter(&dp->dp_config_rwlock, RW_READER);
2077 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2078 rw_exit(&dp->dp_config_rwlock);
2079 if (error) {
2080 if (error == ENOENT) {
2081 /* Racing with destroy, get the next one. */
2082 *strchr(zc->zc_name, '@') = '\0';
2083 dmu_objset_rele(os, FTAG);
2084 goto top;
2085 }
2086 } else {
2087 objset_t *ossnap;
2088
2089 error = dmu_objset_from_ds(ds, &ossnap);
2090 if (error == 0)
2091 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2092 dsl_dataset_rele(ds, FTAG);
2093 }
2094 } else if (error == ENOENT) {
2095 error = ESRCH;
2096 }
2097
2098 dmu_objset_rele(os, FTAG);
2099 /* if we failed, undo the @ that we tacked on to zc_name */
2100 if (error)
2101 *strchr(zc->zc_name, '@') = '\0';
2102 return (error);
2103 }
2104
2105 static int
2106 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2107 {
2108 const char *propname = nvpair_name(pair);
2109 uint64_t *valary;
2110 unsigned int vallen;
2111 const char *domain;
2112 char *dash;
2113 zfs_userquota_prop_t type;
2114 uint64_t rid;
2115 uint64_t quota;
2116 zfsvfs_t *zfsvfs;
2117 int err;
2118
2119 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2120 nvlist_t *attrs;
2121 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2122 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2123 &pair) != 0)
2124 return (EINVAL);
2125 }
2126
2127 /*
2128 * A correctly constructed propname is encoded as
2129 * userquota@<rid>-<domain>.
2130 */
2131 if ((dash = strchr(propname, '-')) == NULL ||
2132 nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2133 vallen != 3)
2134 return (EINVAL);
2135
2136 domain = dash + 1;
2137 type = valary[0];
2138 rid = valary[1];
2139 quota = valary[2];
2140
2141 err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2142 if (err == 0) {
2143 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2144 zfsvfs_rele(zfsvfs, FTAG);
2145 }
2146
2147 return (err);
2148 }
2149
2150 /*
2151 * If the named property is one that has a special function to set its value,
2152 * return 0 on success and a positive error code on failure; otherwise if it is
2153 * not one of the special properties handled by this function, return -1.
2154 *
2155 * XXX: It would be better for callers of the property interface if we handled
2156 * these special cases in dsl_prop.c (in the dsl layer).
2157 */
2158 static int
2159 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2160 nvpair_t *pair)
2161 {
2162 const char *propname = nvpair_name(pair);
2163 zfs_prop_t prop = zfs_name_to_prop(propname);
2164 uint64_t intval;
2165 int err;
2166
2167 if (prop == ZPROP_INVAL) {
2168 if (zfs_prop_userquota(propname))
2169 return (zfs_prop_set_userquota(dsname, pair));
2170 return (-1);
2171 }
2172
2173 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2174 nvlist_t *attrs;
2175 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2176 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2177 &pair) == 0);
2178 }
2179
2180 if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2181 return (-1);
2182
2183 VERIFY(0 == nvpair_value_uint64(pair, &intval));
2184
2185 switch (prop) {
2186 case ZFS_PROP_QUOTA:
2187 err = dsl_dir_set_quota(dsname, source, intval);
2188 break;
2189 case ZFS_PROP_REFQUOTA:
2190 err = dsl_dataset_set_quota(dsname, source, intval);
2191 break;
2192 case ZFS_PROP_RESERVATION:
2193 err = dsl_dir_set_reservation(dsname, source, intval);
2194 break;
2195 case ZFS_PROP_REFRESERVATION:
2196 err = dsl_dataset_set_reservation(dsname, source, intval);
2197 break;
2198 case ZFS_PROP_VOLSIZE:
2199 err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
2200 intval);
2201 break;
2202 case ZFS_PROP_VERSION:
2203 {
2204 zfsvfs_t *zfsvfs;
2205
2206 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2207 break;
2208
2209 err = zfs_set_version(zfsvfs, intval);
2210 zfsvfs_rele(zfsvfs, FTAG);
2211
2212 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2213 zfs_cmd_t *zc;
2214
2215 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2216 (void) strcpy(zc->zc_name, dsname);
2217 (void) zfs_ioc_userspace_upgrade(zc);
2218 kmem_free(zc, sizeof (zfs_cmd_t));
2219 }
2220 break;
2221 }
2222
2223 default:
2224 err = -1;
2225 }
2226
2227 return (err);
2228 }
2229
2230 /*
2231 * This function is best effort. If it fails to set any of the given properties,
2232 * it continues to set as many as it can and returns the first error
2233 * encountered. If the caller provides a non-NULL errlist, it also gives the
2234 * complete list of names of all the properties it failed to set along with the
2235 * corresponding error numbers. The caller is responsible for freeing the
2236 * returned errlist.
2237 *
2238 * If every property is set successfully, zero is returned and the list pointed
2239 * at by errlist is NULL.
2240 */
2241 int
2242 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2243 nvlist_t **errlist)
2244 {
2245 nvpair_t *pair;
2246 nvpair_t *propval;
2247 int rv = 0;
2248 uint64_t intval;
2249 char *strval;
2250 nvlist_t *genericnvl;
2251 nvlist_t *errors;
2252 nvlist_t *retrynvl;
2253
2254 VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2255 VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2256 VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2257
2258 retry:
2259 pair = NULL;
2260 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2261 const char *propname = nvpair_name(pair);
2262 zfs_prop_t prop = zfs_name_to_prop(propname);
2263 int err = 0;
2264
2265 /* decode the property value */
2266 propval = pair;
2267 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2268 nvlist_t *attrs;
2269 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2270 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2271 &propval) != 0)
2272 err = EINVAL;
2273 }
2274
2275 /* Validate value type */
2276 if (err == 0 && prop == ZPROP_INVAL) {
2277 if (zfs_prop_user(propname)) {
2278 if (nvpair_type(propval) != DATA_TYPE_STRING)
2279 err = EINVAL;
2280 } else if (zfs_prop_userquota(propname)) {
2281 if (nvpair_type(propval) !=
2282 DATA_TYPE_UINT64_ARRAY)
2283 err = EINVAL;
2284 } else {
2285 err = EINVAL;
2286 }
2287 } else if (err == 0) {
2288 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2289 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2290 err = EINVAL;
2291 } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2292 const char *unused;
2293
2294 VERIFY(nvpair_value_uint64(propval,
2295 &intval) == 0);
2296
2297 switch (zfs_prop_get_type(prop)) {
2298 case PROP_TYPE_NUMBER:
2299 break;
2300 case PROP_TYPE_STRING:
2301 err = EINVAL;
2302 break;
2303 case PROP_TYPE_INDEX:
2304 if (zfs_prop_index_to_string(prop,
2305 intval, &unused) != 0)
2306 err = EINVAL;
2307 break;
2308 default:
2309 cmn_err(CE_PANIC,
2310 "unknown property type");
2311 }
2312 } else {
2313 err = EINVAL;
2314 }
2315 }
2316
2317 /* Validate permissions */
2318 if (err == 0)
2319 err = zfs_check_settable(dsname, pair, CRED());
2320
2321 if (err == 0) {
2322 err = zfs_prop_set_special(dsname, source, pair);
2323 if (err == -1) {
2324 /*
2325 * For better performance we build up a list of
2326 * properties to set in a single transaction.
2327 */
2328 err = nvlist_add_nvpair(genericnvl, pair);
2329 } else if (err != 0 && nvl != retrynvl) {
2330 /*
2331 * This may be a spurious error caused by
2332 * receiving quota and reservation out of order.
2333 * Try again in a second pass.
2334 */
2335 err = nvlist_add_nvpair(retrynvl, pair);
2336 }
2337 }
2338
2339 if (err != 0)
2340 VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2341 }
2342
2343 if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2344 nvl = retrynvl;
2345 goto retry;
2346 }
2347
2348 if (!nvlist_empty(genericnvl) &&
2349 dsl_props_set(dsname, source, genericnvl) != 0) {
2350 /*
2351 * If this fails, we still want to set as many properties as we
2352 * can, so try setting them individually.
2353 */
2354 pair = NULL;
2355 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2356 const char *propname = nvpair_name(pair);
2357 int err = 0;
2358
2359 propval = pair;
2360 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2361 nvlist_t *attrs;
2362 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2363 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2364 &propval) == 0);
2365 }
2366
2367 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2368 VERIFY(nvpair_value_string(propval,
2369 &strval) == 0);
2370 err = dsl_prop_set(dsname, propname, source, 1,
2371 strlen(strval) + 1, strval);
2372 } else {
2373 VERIFY(nvpair_value_uint64(propval,
2374 &intval) == 0);
2375 err = dsl_prop_set(dsname, propname, source, 8,
2376 1, &intval);
2377 }
2378
2379 if (err != 0) {
2380 VERIFY(nvlist_add_int32(errors, propname,
2381 err) == 0);
2382 }
2383 }
2384 }
2385 nvlist_free(genericnvl);
2386 nvlist_free(retrynvl);
2387
2388 if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2389 nvlist_free(errors);
2390 errors = NULL;
2391 } else {
2392 VERIFY(nvpair_value_int32(pair, &rv) == 0);
2393 }
2394
2395 if (errlist == NULL)
2396 nvlist_free(errors);
2397 else
2398 *errlist = errors;
2399
2400 return (rv);
2401 }
2402
2403 /*
2404 * Check that all the properties are valid user properties.
2405 */
2406 static int
2407 zfs_check_userprops(char *fsname, nvlist_t *nvl)
2408 {
2409 nvpair_t *pair = NULL;
2410 int error = 0;
2411
2412 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2413 const char *propname = nvpair_name(pair);
2414 char *valstr;
2415
2416 if (!zfs_prop_user(propname) ||
2417 nvpair_type(pair) != DATA_TYPE_STRING)
2418 return (EINVAL);
2419
2420 if (error = zfs_secpolicy_write_perms(fsname,
2421 ZFS_DELEG_PERM_USERPROP, CRED()))
2422 return (error);
2423
2424 if (strlen(propname) >= ZAP_MAXNAMELEN)
2425 return (ENAMETOOLONG);
2426
2427 VERIFY(nvpair_value_string(pair, &valstr) == 0);
2428 if (strlen(valstr) >= ZAP_MAXVALUELEN)
2429 return (E2BIG);
2430 }
2431 return (0);
2432 }
2433
2434 static void
2435 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2436 {
2437 nvpair_t *pair;
2438
2439 VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2440
2441 pair = NULL;
2442 while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2443 if (nvlist_exists(skipped, nvpair_name(pair)))
2444 continue;
2445
2446 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2447 }
2448 }
2449
2450 static int
2451 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2452 nvlist_t *skipped)
2453 {
2454 int err = 0;
2455 nvlist_t *cleared_props = NULL;
2456 props_skip(props, skipped, &cleared_props);
2457 if (!nvlist_empty(cleared_props)) {
2458 /*
2459 * Acts on local properties until the dataset has received
2460 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2461 */
2462 zprop_source_t flags = (ZPROP_SRC_NONE |
2463 (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2464 err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2465 }
2466 nvlist_free(cleared_props);
2467 return (err);
2468 }
2469
2470 /*
2471 * inputs:
2472 * zc_name name of filesystem
2473 * zc_value name of property to set
2474 * zc_nvlist_src{_size} nvlist of properties to apply
2475 * zc_cookie received properties flag
2476 *
2477 * outputs:
2478 * zc_nvlist_dst{_size} error for each unapplied received property
2479 */
2480 static int
2481 zfs_ioc_set_prop(zfs_cmd_t *zc)
2482 {
2483 nvlist_t *nvl;
2484 boolean_t received = zc->zc_cookie;
2485 zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2486 ZPROP_SRC_LOCAL);
2487 nvlist_t *errors = NULL;
2488 int error;
2489
2490 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2491 zc->zc_iflags, &nvl)) != 0)
2492 return (error);
2493
2494 if (received) {
2495 nvlist_t *origprops;
2496 objset_t *os;
2497
2498 if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2499 if (dsl_prop_get_received(os, &origprops) == 0) {
2500 (void) clear_received_props(os,
2501 zc->zc_name, origprops, nvl);
2502 nvlist_free(origprops);
2503 }
2504
2505 dsl_prop_set_hasrecvd(os);
2506 dmu_objset_rele(os, FTAG);
2507 }
2508 }
2509
2510 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2511
2512 if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2513 (void) put_nvlist(zc, errors);
2514 }
2515
2516 nvlist_free(errors);
2517 nvlist_free(nvl);
2518 return (error);
2519 }
2520
2521 /*
2522 * inputs:
2523 * zc_name name of filesystem
2524 * zc_value name of property to inherit
2525 * zc_cookie revert to received value if TRUE
2526 *
2527 * outputs: none
2528 */
2529 static int
2530 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2531 {
2532 const char *propname = zc->zc_value;
2533 zfs_prop_t prop = zfs_name_to_prop(propname);
2534 boolean_t received = zc->zc_cookie;
2535 zprop_source_t source = (received
2536 ? ZPROP_SRC_NONE /* revert to received value, if any */
2537 : ZPROP_SRC_INHERITED); /* explicitly inherit */
2538
2539 if (received) {
2540 nvlist_t *dummy;
2541 nvpair_t *pair;
2542 zprop_type_t type;
2543 int err;
2544
2545 /*
2546 * zfs_prop_set_special() expects properties in the form of an
2547 * nvpair with type info.
2548 */
2549 if (prop == ZPROP_INVAL) {
2550 if (!zfs_prop_user(propname))
2551 return (EINVAL);
2552
2553 type = PROP_TYPE_STRING;
2554 } else if (prop == ZFS_PROP_VOLSIZE ||
2555 prop == ZFS_PROP_VERSION) {
2556 return (EINVAL);
2557 } else {
2558 type = zfs_prop_get_type(prop);
2559 }
2560
2561 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2562
2563 switch (type) {
2564 case PROP_TYPE_STRING:
2565 VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2566 break;
2567 case PROP_TYPE_NUMBER:
2568 case PROP_TYPE_INDEX:
2569 VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2570 break;
2571 default:
2572 nvlist_free(dummy);
2573 return (EINVAL);
2574 }
2575
2576 pair = nvlist_next_nvpair(dummy, NULL);
2577 err = zfs_prop_set_special(zc->zc_name, source, pair);
2578 nvlist_free(dummy);
2579 if (err != -1)
2580 return (err); /* special property already handled */
2581 } else {
2582 /*
2583 * Only check this in the non-received case. We want to allow
2584 * 'inherit -S' to revert non-inheritable properties like quota
2585 * and reservation to the received or default values even though
2586 * they are not considered inheritable.
2587 */
2588 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2589 return (EINVAL);
2590 }
2591
2592 /* the property name has been validated by zfs_secpolicy_inherit() */
2593 return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2594 }
2595
2596 static int
2597 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2598 {
2599 nvlist_t *props;
2600 spa_t *spa;
2601 int error;
2602 nvpair_t *pair;
2603
2604 if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2605 zc->zc_iflags, &props))
2606 return (error);
2607
2608 /*
2609 * If the only property is the configfile, then just do a spa_lookup()
2610 * to handle the faulted case.
2611 */
2612 pair = nvlist_next_nvpair(props, NULL);
2613 if (pair != NULL && strcmp(nvpair_name(pair),
2614 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2615 nvlist_next_nvpair(props, pair) == NULL) {
2616 mutex_enter(&spa_namespace_lock);
2617 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2618 spa_configfile_set(spa, props, B_FALSE);
2619 spa_config_sync(spa, B_FALSE, B_TRUE);
2620 }
2621 mutex_exit(&spa_namespace_lock);
2622 if (spa != NULL) {
2623 nvlist_free(props);
2624 return (0);
2625 }
2626 }
2627
2628 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2629 nvlist_free(props);
2630 return (error);
2631 }
2632
2633 error = spa_prop_set(spa, props);
2634
2635 nvlist_free(props);
2636 spa_close(spa, FTAG);
2637
2638 return (error);
2639 }
2640
2641 static int
2642 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2643 {
2644 spa_t *spa;
2645 int error;
2646 nvlist_t *nvp = NULL;
2647
2648 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2649 /*
2650 * If the pool is faulted, there may be properties we can still
2651 * get (such as altroot and cachefile), so attempt to get them
2652 * anyway.
2653 */
2654 mutex_enter(&spa_namespace_lock);
2655 if ((spa = spa_lookup(zc->zc_name)) != NULL)
2656 error = spa_prop_get(spa, &nvp);
2657 mutex_exit(&spa_namespace_lock);
2658 } else {
2659 error = spa_prop_get(spa, &nvp);
2660 spa_close(spa, FTAG);
2661 }
2662
2663 if (error == 0 && zc->zc_nvlist_dst != NULL)
2664 error = put_nvlist(zc, nvp);
2665 else
2666 error = EFAULT;
2667
2668 nvlist_free(nvp);
2669 return (error);
2670 }
2671
2672 /*
2673 * inputs:
2674 * zc_name name of filesystem
2675 * zc_nvlist_src{_size} nvlist of delegated permissions
2676 * zc_perm_action allow/unallow flag
2677 *
2678 * outputs: none
2679 */
2680 static int
2681 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2682 {
2683 int error;
2684 nvlist_t *fsaclnv = NULL;
2685
2686 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2687 zc->zc_iflags, &fsaclnv)) != 0)
2688 return (error);
2689
2690 /*
2691 * Verify nvlist is constructed correctly
2692 */
2693 if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2694 nvlist_free(fsaclnv);
2695 return (EINVAL);
2696 }
2697
2698 /*
2699 * If we don't have PRIV_SYS_MOUNT, then validate
2700 * that user is allowed to hand out each permission in
2701 * the nvlist(s)
2702 */
2703
2704 error = secpolicy_zfs(CRED());
2705 if (error) {
2706 if (zc->zc_perm_action == B_FALSE) {
2707 error = dsl_deleg_can_allow(zc->zc_name,
2708 fsaclnv, CRED());
2709 } else {
2710 error = dsl_deleg_can_unallow(zc->zc_name,
2711 fsaclnv, CRED());
2712 }
2713 }
2714
2715 if (error == 0)
2716 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2717
2718 nvlist_free(fsaclnv);
2719 return (error);
2720 }
2721
2722 /*
2723 * inputs:
2724 * zc_name name of filesystem
2725 *
2726 * outputs:
2727 * zc_nvlist_src{_size} nvlist of delegated permissions
2728 */
2729 static int
2730 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2731 {
2732 nvlist_t *nvp;
2733 int error;
2734
2735 if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2736 error = put_nvlist(zc, nvp);
2737 nvlist_free(nvp);
2738 }
2739
2740 return (error);
2741 }
2742
2743 /*
2744 * Search the vfs list for a specified resource. Returns a pointer to it
2745 * or NULL if no suitable entry is found. The caller of this routine
2746 * is responsible for releasing the returned vfs pointer.
2747 */
2748 static vfs_t *
2749 zfs_get_vfs(const char *resource)
2750 {
2751 struct vfs *vfsp;
2752 struct vfs *vfs_found = NULL;
2753
2754 vfs_list_read_lock();
2755 vfsp = rootvfs;
2756 do {
2757 if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2758 VFS_HOLD(vfsp);
2759 vfs_found = vfsp;
2760 break;
2761 }
2762 vfsp = vfsp->vfs_next;
2763 } while (vfsp != rootvfs);
2764 vfs_list_unlock();
2765 return (vfs_found);
2766 }
2767
2768 /* ARGSUSED */
2769 static void
2770 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2771 {
2772 zfs_creat_t *zct = arg;
2773
2774 zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2775 }
2776
2777 #define ZFS_PROP_UNDEFINED ((uint64_t)-1)
2778
2779 /*
2780 * inputs:
2781 * createprops list of properties requested by creator
2782 * default_zplver zpl version to use if unspecified in createprops
2783 * fuids_ok fuids allowed in this version of the spa?
2784 * os parent objset pointer (NULL if root fs)
2785 *
2786 * outputs:
2787 * zplprops values for the zplprops we attach to the master node object
2788 * is_ci true if requested file system will be purely case-insensitive
2789 *
2790 * Determine the settings for utf8only, normalization and
2791 * casesensitivity. Specific values may have been requested by the
2792 * creator and/or we can inherit values from the parent dataset. If
2793 * the file system is of too early a vintage, a creator can not
2794 * request settings for these properties, even if the requested
2795 * setting is the default value. We don't actually want to create dsl
2796 * properties for these, so remove them from the source nvlist after
2797 * processing.
2798 */
2799 static int
2800 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2801 boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2802 nvlist_t *zplprops, boolean_t *is_ci)
2803 {
2804 uint64_t sense = ZFS_PROP_UNDEFINED;
2805 uint64_t norm = ZFS_PROP_UNDEFINED;
2806 uint64_t u8 = ZFS_PROP_UNDEFINED;
2807
2808 ASSERT(zplprops != NULL);
2809
2810 /*
2811 * Pull out creator prop choices, if any.
2812 */
2813 if (createprops) {
2814 (void) nvlist_lookup_uint64(createprops,
2815 zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2816 (void) nvlist_lookup_uint64(createprops,
2817 zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2818 (void) nvlist_remove_all(createprops,
2819 zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2820 (void) nvlist_lookup_uint64(createprops,
2821 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2822 (void) nvlist_remove_all(createprops,
2823 zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2824 (void) nvlist_lookup_uint64(createprops,
2825 zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2826 (void) nvlist_remove_all(createprops,
2827 zfs_prop_to_name(ZFS_PROP_CASE));
2828 }
2829
2830 /*
2831 * If the zpl version requested is whacky or the file system
2832 * or pool is version is too "young" to support normalization
2833 * and the creator tried to set a value for one of the props,
2834 * error out.
2835 */
2836 if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2837 (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2838 (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2839 (zplver < ZPL_VERSION_NORMALIZATION &&
2840 (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2841 sense != ZFS_PROP_UNDEFINED)))
2842 return (ENOTSUP);
2843
2844 /*
2845 * Put the version in the zplprops
2846 */
2847 VERIFY(nvlist_add_uint64(zplprops,
2848 zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2849
2850 if (norm == ZFS_PROP_UNDEFINED)
2851 VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2852 VERIFY(nvlist_add_uint64(zplprops,
2853 zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2854
2855 /*
2856 * If we're normalizing, names must always be valid UTF-8 strings.
2857 */
2858 if (norm)
2859 u8 = 1;
2860 if (u8 == ZFS_PROP_UNDEFINED)
2861 VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2862 VERIFY(nvlist_add_uint64(zplprops,
2863 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2864
2865 if (sense == ZFS_PROP_UNDEFINED)
2866 VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2867 VERIFY(nvlist_add_uint64(zplprops,
2868 zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2869
2870 if (is_ci)
2871 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
2872
2873 return (0);
2874 }
2875
2876 static int
2877 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2878 nvlist_t *zplprops, boolean_t *is_ci)
2879 {
2880 boolean_t fuids_ok, sa_ok;
2881 uint64_t zplver = ZPL_VERSION;
2882 objset_t *os = NULL;
2883 char parentname[MAXNAMELEN];
2884 char *cp;
2885 spa_t *spa;
2886 uint64_t spa_vers;
2887 int error;
2888
2889 (void) strlcpy(parentname, dataset, sizeof (parentname));
2890 cp = strrchr(parentname, '/');
2891 ASSERT(cp != NULL);
2892 cp[0] = '\0';
2893
2894 if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2895 return (error);
2896
2897 spa_vers = spa_version(spa);
2898 spa_close(spa, FTAG);
2899
2900 zplver = zfs_zpl_version_map(spa_vers);
2901 fuids_ok = (zplver >= ZPL_VERSION_FUID);
2902 sa_ok = (zplver >= ZPL_VERSION_SA);
2903
2904 /*
2905 * Open parent object set so we can inherit zplprop values.
2906 */
2907 if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2908 return (error);
2909
2910 error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2911 zplprops, is_ci);
2912 dmu_objset_rele(os, FTAG);
2913 return (error);
2914 }
2915
2916 static int
2917 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2918 nvlist_t *zplprops, boolean_t *is_ci)
2919 {
2920 boolean_t fuids_ok;
2921 boolean_t sa_ok;
2922 uint64_t zplver = ZPL_VERSION;
2923 int error;
2924
2925 zplver = zfs_zpl_version_map(spa_vers);
2926 fuids_ok = (zplver >= ZPL_VERSION_FUID);
2927 sa_ok = (zplver >= ZPL_VERSION_SA);
2928
2929 error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2930 createprops, zplprops, is_ci);
2931 return (error);
2932 }
2933
2934 /*
2935 * inputs:
2936 * zc_objset_type type of objset to create (fs vs zvol)
2937 * zc_name name of new objset
2938 * zc_value name of snapshot to clone from (may be empty)
2939 * zc_nvlist_src{_size} nvlist of properties to apply
2940 *
2941 * outputs: none
2942 */
2943 static int
2944 zfs_ioc_create(zfs_cmd_t *zc)
2945 {
2946 objset_t *clone;
2947 int error = 0;
2948 zfs_creat_t zct;
2949 nvlist_t *nvprops = NULL;
2950 void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2951 dmu_objset_type_t type = zc->zc_objset_type;
2952
2953 switch (type) {
2954
2955 case DMU_OST_ZFS:
2956 cbfunc = zfs_create_cb;
2957 break;
2958
2959 case DMU_OST_ZVOL:
2960 cbfunc = zvol_create_cb;
2961 break;
2962
2963 default:
2964 cbfunc = NULL;
2965 break;
2966 }
2967 if (strchr(zc->zc_name, '@') ||
2968 strchr(zc->zc_name, '%'))
2969 return (EINVAL);
2970
2971 if (zc->zc_nvlist_src != NULL &&
2972 (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2973 zc->zc_iflags, &nvprops)) != 0)
2974 return (error);
2975
2976 zct.zct_zplprops = NULL;
2977 zct.zct_props = nvprops;
2978
2979 if (zc->zc_value[0] != '\0') {
2980 /*
2981 * We're creating a clone of an existing snapshot.
2982 */
2983 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2984 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2985 nvlist_free(nvprops);
2986 return (EINVAL);
2987 }
2988
2989 error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2990 if (error) {
2991 nvlist_free(nvprops);
2992 return (error);
2993 }
2994
2995 error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2996 dmu_objset_rele(clone, FTAG);
2997 if (error) {
2998 nvlist_free(nvprops);
2999 return (error);
3000 }
3001 } else {
3002 boolean_t is_insensitive = B_FALSE;
3003
3004 if (cbfunc == NULL) {
3005 nvlist_free(nvprops);
3006 return (EINVAL);
3007 }
3008
3009 if (type == DMU_OST_ZVOL) {
3010 uint64_t volsize, volblocksize;
3011
3012 if (nvprops == NULL ||
3013 nvlist_lookup_uint64(nvprops,
3014 zfs_prop_to_name(ZFS_PROP_VOLSIZE),
3015 &volsize) != 0) {
3016 nvlist_free(nvprops);
3017 return (EINVAL);
3018 }
3019
3020 if ((error = nvlist_lookup_uint64(nvprops,
3021 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3022 &volblocksize)) != 0 && error != ENOENT) {
3023 nvlist_free(nvprops);
3024 return (EINVAL);
3025 }
3026
3027 if (error != 0)
3028 volblocksize = zfs_prop_default_numeric(
3029 ZFS_PROP_VOLBLOCKSIZE);
3030
3031 if ((error = zvol_check_volblocksize(
3032 volblocksize)) != 0 ||
3033 (error = zvol_check_volsize(volsize,
3034 volblocksize)) != 0) {
3035 nvlist_free(nvprops);
3036 return (error);
3037 }
3038 } else if (type == DMU_OST_ZFS) {
3039 int error;
3040
3041 /*
3042 * We have to have normalization and
3043 * case-folding flags correct when we do the
3044 * file system creation, so go figure them out
3045 * now.
3046 */
3047 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3048 NV_UNIQUE_NAME, KM_SLEEP) == 0);
3049 error = zfs_fill_zplprops(zc->zc_name, nvprops,
3050 zct.zct_zplprops, &is_insensitive);
3051 if (error != 0) {
3052 nvlist_free(nvprops);
3053 nvlist_free(zct.zct_zplprops);
3054 return (error);
3055 }
3056 }
3057 error = dmu_objset_create(zc->zc_name, type,
3058 is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3059 nvlist_free(zct.zct_zplprops);
3060 }
3061
3062 /*
3063 * It would be nice to do this atomically.
3064 */
3065 if (error == 0) {
3066 error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
3067 nvprops, NULL);
3068 if (error != 0)
3069 (void) dmu_objset_destroy(zc->zc_name, B_FALSE);
3070 }
3071 nvlist_free(nvprops);
3072 return (error);
3073 }
3074
3075 /*
3076 * inputs:
3077 * zc_name name of filesystem
3078 * zc_value short name of snapshot
3079 * zc_cookie recursive flag
3080 * zc_nvlist_src[_size] property list
3081 *
3082 * outputs:
3083 * zc_value short snapname (i.e. part after the '@')
3084 */
3085 static int
3086 zfs_ioc_snapshot(zfs_cmd_t *zc)
3087 {
3088 nvlist_t *nvprops = NULL;
3089 int error;
3090 boolean_t recursive = zc->zc_cookie;
3091
3092 if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3093 return (EINVAL);
3094
3095 if (zc->zc_nvlist_src != NULL &&
3096 (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3097 zc->zc_iflags, &nvprops)) != 0)
3098 return (error);
3099
3100 error = zfs_check_userprops(zc->zc_name, nvprops);
3101 if (error)
3102 goto out;
3103
3104 if (!nvlist_empty(nvprops) &&
3105 zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
3106 error = ENOTSUP;
3107 goto out;
3108 }
3109
3110 error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
3111 nvprops, recursive, B_FALSE, -1);
3112
3113 out:
3114 nvlist_free(nvprops);
3115 return (error);
3116 }
3117
3118 int
3119 zfs_unmount_snap(const char *name, void *arg)
3120 {
3121 vfs_t *vfsp = NULL;
3122
3123 if (arg) {
3124 char *snapname = arg;
3125 char *fullname = kmem_asprintf("%s@%s", name, snapname);
3126 vfsp = zfs_get_vfs(fullname);
3127 strfree(fullname);
3128 } else if (strchr(name, '@')) {
3129 vfsp = zfs_get_vfs(name);
3130 }
3131
3132 if (vfsp) {
3133 /*
3134 * Always force the unmount for snapshots.
3135 */
3136 int flag = MS_FORCE;
3137 int err;
3138
3139 if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
3140 VFS_RELE(vfsp);
3141 return (err);
3142 }
3143 VFS_RELE(vfsp);
3144 if ((err = dounmount(vfsp, flag, kcred)) != 0)
3145 return (err);
3146 }
3147 return (0);
3148 }
3149
3150 /*
3151 * inputs:
3152 * zc_name name of filesystem, snaps must be under it
3153 * zc_nvlist_src[_size] full names of snapshots to destroy
3154 * zc_defer_destroy mark for deferred destroy
3155 *
3156 * outputs:
3157 * zc_name on failure, name of failed snapshot
3158 */
3159 static int
3160 zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
3161 {
3162 int err, len;
3163 nvlist_t *nvl;
3164 nvpair_t *pair;
3165
3166 if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3167 zc->zc_iflags, &nvl)) != 0)
3168 return (err);
3169
3170 len = strlen(zc->zc_name);
3171 for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
3172 pair = nvlist_next_nvpair(nvl, pair)) {
3173 const char *name = nvpair_name(pair);
3174 /*
3175 * The snap name must be underneath the zc_name. This ensures
3176 * that our permission checks were legitimate.
3177 */
3178 if (strncmp(zc->zc_name, name, len) != 0 ||
3179 (name[len] != '@' && name[len] != '/')) {
3180 nvlist_free(nvl);
3181 return (EINVAL);
3182 }
3183
3184 (void) zfs_unmount_snap(name, NULL);
3185 }
3186
3187 err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
3188 zc->zc_name);
3189 nvlist_free(nvl);
3190 return (err);
3191 }
3192
3193 /*
3194 * inputs:
3195 * zc_name name of dataset to destroy
3196 * zc_objset_type type of objset
3197 * zc_defer_destroy mark for deferred destroy
3198 *
3199 * outputs: none
3200 */
3201 static int
3202 zfs_ioc_destroy(zfs_cmd_t *zc)
3203 {
3204 int err;
3205 if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
3206 err = zfs_unmount_snap(zc->zc_name, NULL);
3207 if (err)
3208 return (err);
3209 }
3210
3211 err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
3212 if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3213 (void) zvol_remove_minor(zc->zc_name);
3214 return (err);
3215 }
3216
3217 /*
3218 * inputs:
3219 * zc_name name of dataset to rollback (to most recent snapshot)
3220 *
3221 * outputs: none
3222 */
3223 static int
3224 zfs_ioc_rollback(zfs_cmd_t *zc)
3225 {
3226 dsl_dataset_t *ds, *clone;
3227 int error;
3228 zfsvfs_t *zfsvfs;
3229 char *clone_name;
3230
3231 error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
3232 if (error)
3233 return (error);
3234
3235 /* must not be a snapshot */
3236 if (dsl_dataset_is_snapshot(ds)) {
3237 dsl_dataset_rele(ds, FTAG);
3238 return (EINVAL);
3239 }
3240
3241 /* must have a most recent snapshot */
3242 if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
3243 dsl_dataset_rele(ds, FTAG);
3244 return (EINVAL);
3245 }
3246
3247 /*
3248 * Create clone of most recent snapshot.
3249 */
3250 clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
3251 error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
3252 if (error)
3253 goto out;
3254
3255 error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3256 if (error)
3257 goto out;
3258
3259 /*
3260 * Do clone swap.
3261 */
3262 if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3263 error = zfs_suspend_fs(zfsvfs);
3264 if (error == 0) {
3265 int resume_err;
3266
3267 if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3268 error = dsl_dataset_clone_swap(clone, ds,
3269 B_TRUE);
3270 dsl_dataset_disown(ds, FTAG);
3271 ds = NULL;
3272 } else {
3273 error = EBUSY;
3274 }
3275 resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3276 error = error ? error : resume_err;
3277 }
3278 VFS_RELE(zfsvfs->z_vfs);
3279 } else {
3280 if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3281 error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3282 dsl_dataset_disown(ds, FTAG);
3283 ds = NULL;
3284 } else {
3285 error = EBUSY;
3286 }
3287 }
3288
3289 /*
3290 * Destroy clone (which also closes it).
3291 */
3292 (void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3293
3294 out:
3295 strfree(clone_name);
3296 if (ds)
3297 dsl_dataset_rele(ds, FTAG);
3298 return (error);
3299 }
3300
3301 /*
3302 * inputs:
3303 * zc_name old name of dataset
3304 * zc_value new name of dataset
3305 * zc_cookie recursive flag (only valid for snapshots)
3306 *
3307 * outputs: none
3308 */
3309 static int
3310 zfs_ioc_rename(zfs_cmd_t *zc)
3311 {
3312 boolean_t recursive = zc->zc_cookie & 1;
3313
3314 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3315 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3316 strchr(zc->zc_value, '%'))
3317 return (EINVAL);
3318
3319 /*
3320 * Unmount snapshot unless we're doing a recursive rename,
3321 * in which case the dataset code figures out which snapshots
3322 * to unmount.
3323 */
3324 if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3325 zc->zc_objset_type == DMU_OST_ZFS) {
3326 int err = zfs_unmount_snap(zc->zc_name, NULL);
3327 if (err)
3328 return (err);
3329 }
3330 if (zc->zc_objset_type == DMU_OST_ZVOL)
3331 (void) zvol_remove_minor(zc->zc_name);
3332 return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3333 }
3334
3335 static int
3336 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3337 {
3338 const char *propname = nvpair_name(pair);
3339 boolean_t issnap = (strchr(dsname, '@') != NULL);
3340 zfs_prop_t prop = zfs_name_to_prop(propname);
3341 uint64_t intval;
3342 int err;
3343
3344 if (prop == ZPROP_INVAL) {
3345 if (zfs_prop_user(propname)) {
3346 if (err = zfs_secpolicy_write_perms(dsname,
3347 ZFS_DELEG_PERM_USERPROP, cr))
3348 return (err);
3349 return (0);
3350 }
3351
3352 if (!issnap && zfs_prop_userquota(propname)) {
3353 const char *perm = NULL;
3354 const char *uq_prefix =
3355 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3356 const char *gq_prefix =
3357 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3358
3359 if (strncmp(propname, uq_prefix,
3360 strlen(uq_prefix)) == 0) {
3361 perm = ZFS_DELEG_PERM_USERQUOTA;
3362 } else if (strncmp(propname, gq_prefix,
3363 strlen(gq_prefix)) == 0) {
3364 perm = ZFS_DELEG_PERM_GROUPQUOTA;
3365 } else {
3366 /* USERUSED and GROUPUSED are read-only */
3367 return (EINVAL);
3368 }
3369
3370 if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3371 return (err);
3372 return (0);
3373 }
3374
3375 return (EINVAL);
3376 }
3377
3378 if (issnap)
3379 return (EINVAL);
3380
3381 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3382 /*
3383 * dsl_prop_get_all_impl() returns properties in this
3384 * format.
3385 */
3386 nvlist_t *attrs;
3387 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3388 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3389 &pair) == 0);
3390 }
3391
3392 /*
3393 * Check that this value is valid for this pool version
3394 */
3395 switch (prop) {
3396 case ZFS_PROP_COMPRESSION:
3397 /*
3398 * If the user specified gzip compression, make sure
3399 * the SPA supports it. We ignore any errors here since
3400 * we'll catch them later.
3401 */
3402 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3403 nvpair_value_uint64(pair, &intval) == 0) {
3404 if (intval >= ZIO_COMPRESS_GZIP_1 &&
3405 intval <= ZIO_COMPRESS_GZIP_9 &&
3406 zfs_earlier_version(dsname,
3407 SPA_VERSION_GZIP_COMPRESSION)) {
3408 return (ENOTSUP);
3409 }
3410
3411 if (intval == ZIO_COMPRESS_ZLE &&
3412 zfs_earlier_version(dsname,
3413 SPA_VERSION_ZLE_COMPRESSION))
3414 return (ENOTSUP);
3415
3416 /*
3417 * If this is a bootable dataset then
3418 * verify that the compression algorithm
3419 * is supported for booting. We must return
3420 * something other than ENOTSUP since it
3421 * implies a downrev pool version.
3422 */
3423 if (zfs_is_bootfs(dsname) &&
3424 !BOOTFS_COMPRESS_VALID(intval)) {
3425 return (ERANGE);
3426 }
3427 }
3428 break;
3429
3430 case ZFS_PROP_COPIES:
3431 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3432 return (ENOTSUP);
3433 break;
3434
3435 case ZFS_PROP_DEDUP:
3436 if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3437 return (ENOTSUP);
3438 break;
3439
3440 case ZFS_PROP_SHARESMB:
3441 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3442 return (ENOTSUP);
3443 break;
3444
3445 case ZFS_PROP_ACLINHERIT:
3446 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3447 nvpair_value_uint64(pair, &intval) == 0) {
3448 if (intval == ZFS_ACL_PASSTHROUGH_X &&
3449 zfs_earlier_version(dsname,
3450 SPA_VERSION_PASSTHROUGH_X))
3451 return (ENOTSUP);
3452 }
3453 break;
3454 }
3455
3456 return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3457 }
3458
3459 /*
3460 * Removes properties from the given props list that fail permission checks
3461 * needed to clear them and to restore them in case of a receive error. For each
3462 * property, make sure we have both set and inherit permissions.
3463 *
3464 * Returns the first error encountered if any permission checks fail. If the
3465 * caller provides a non-NULL errlist, it also gives the complete list of names
3466 * of all the properties that failed a permission check along with the
3467 * corresponding error numbers. The caller is responsible for freeing the
3468 * returned errlist.
3469 *
3470 * If every property checks out successfully, zero is returned and the list
3471 * pointed at by errlist is NULL.
3472 */
3473 static int
3474 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3475 {
3476 zfs_cmd_t *zc;
3477 nvpair_t *pair, *next_pair;
3478 nvlist_t *errors;
3479 int err, rv = 0;
3480
3481 if (props == NULL)
3482 return (0);
3483
3484 VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3485
3486 zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3487 (void) strcpy(zc->zc_name, dataset);
3488 pair = nvlist_next_nvpair(props, NULL);
3489 while (pair != NULL) {
3490 next_pair = nvlist_next_nvpair(props, pair);
3491
3492 (void) strcpy(zc->zc_value, nvpair_name(pair));
3493 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3494 (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3495 VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3496 VERIFY(nvlist_add_int32(errors,
3497 zc->zc_value, err) == 0);
3498 }
3499 pair = next_pair;
3500 }
3501 kmem_free(zc, sizeof (zfs_cmd_t));
3502
3503 if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3504 nvlist_free(errors);
3505 errors = NULL;
3506 } else {
3507 VERIFY(nvpair_value_int32(pair, &rv) == 0);
3508 }
3509
3510 if (errlist == NULL)
3511 nvlist_free(errors);
3512 else
3513 *errlist = errors;
3514
3515 return (rv);
3516 }
3517
3518 static boolean_t
3519 propval_equals(nvpair_t *p1, nvpair_t *p2)
3520 {
3521 if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3522 /* dsl_prop_get_all_impl() format */
3523 nvlist_t *attrs;
3524 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3525 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3526 &p1) == 0);
3527 }
3528
3529 if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3530 nvlist_t *attrs;
3531 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3532 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3533 &p2) == 0);
3534 }
3535
3536 if (nvpair_type(p1) != nvpair_type(p2))
3537 return (B_FALSE);
3538
3539 if (nvpair_type(p1) == DATA_TYPE_STRING) {
3540 char *valstr1, *valstr2;
3541
3542 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3543 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3544 return (strcmp(valstr1, valstr2) == 0);
3545 } else {
3546 uint64_t intval1, intval2;
3547
3548 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3549 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3550 return (intval1 == intval2);
3551 }
3552 }
3553
3554 /*
3555 * Remove properties from props if they are not going to change (as determined
3556 * by comparison with origprops). Remove them from origprops as well, since we
3557 * do not need to clear or restore properties that won't change.
3558 */
3559 static void
3560 props_reduce(nvlist_t *props, nvlist_t *origprops)
3561 {
3562 nvpair_t *pair, *next_pair;
3563
3564 if (origprops == NULL)
3565 return; /* all props need to be received */
3566
3567 pair = nvlist_next_nvpair(props, NULL);
3568 while (pair != NULL) {
3569 const char *propname = nvpair_name(pair);
3570 nvpair_t *match;
3571
3572 next_pair = nvlist_next_nvpair(props, pair);
3573
3574 if ((nvlist_lookup_nvpair(origprops, propname,
3575 &match) != 0) || !propval_equals(pair, match))
3576 goto next; /* need to set received value */
3577
3578 /* don't clear the existing received value */
3579 (void) nvlist_remove_nvpair(origprops, match);
3580 /* don't bother receiving the property */
3581 (void) nvlist_remove_nvpair(props, pair);
3582 next:
3583 pair = next_pair;
3584 }
3585 }
3586
3587 #ifdef DEBUG
3588 static boolean_t zfs_ioc_recv_inject_err;
3589 #endif
3590
3591 /*
3592 * inputs:
3593 * zc_name name of containing filesystem
3594 * zc_nvlist_src{_size} nvlist of properties to apply
3595 * zc_value name of snapshot to create
3596 * zc_string name of clone origin (if DRR_FLAG_CLONE)
3597 * zc_cookie file descriptor to recv from
3598 * zc_begin_record the BEGIN record of the stream (not byteswapped)
3599 * zc_guid force flag
3600 * zc_cleanup_fd cleanup-on-exit file descriptor
3601 * zc_action_handle handle for this guid/ds mapping (or zero on first call)
3602 *
3603 * outputs:
3604 * zc_cookie number of bytes read
3605 * zc_nvlist_dst{_size} error for each unapplied received property
3606 * zc_obj zprop_errflags_t
3607 * zc_action_handle handle for this guid/ds mapping
3608 */
3609 static int
3610 zfs_ioc_recv(zfs_cmd_t *zc)
3611 {
3612 file_t *fp;
3613 objset_t *os;
3614 dmu_recv_cookie_t drc;
3615 boolean_t force = (boolean_t)zc->zc_guid;
3616 int fd;
3617 int error = 0;
3618 int props_error = 0;
3619 nvlist_t *errors;
3620 offset_t off;
3621 nvlist_t *props = NULL; /* sent properties */
3622 nvlist_t *origprops = NULL; /* existing properties */
3623 objset_t *origin = NULL;
3624 char *tosnap;
3625 char tofs[ZFS_MAXNAMELEN];
3626 boolean_t first_recvd_props = B_FALSE;
3627
3628 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3629 strchr(zc->zc_value, '@') == NULL ||
3630 strchr(zc->zc_value, '%'))
3631 return (EINVAL);
3632
3633 (void) strcpy(tofs, zc->zc_value);
3634 tosnap = strchr(tofs, '@');
3635 *tosnap++ = '\0';
3636
3637 if (zc->zc_nvlist_src != NULL &&
3638 (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3639 zc->zc_iflags, &props)) != 0)
3640 return (error);
3641
3642 fd = zc->zc_cookie;
3643 fp = getf(fd);
3644 if (fp == NULL) {
3645 nvlist_free(props);
3646 return (EBADF);
3647 }
3648
3649 VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3650
3651 if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3652 if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3653 !dsl_prop_get_hasrecvd(os)) {
3654 first_recvd_props = B_TRUE;
3655 }
3656
3657 /*
3658 * If new received properties are supplied, they are to
3659 * completely replace the existing received properties, so stash
3660 * away the existing ones.
3661 */
3662 if (dsl_prop_get_received(os, &origprops) == 0) {
3663 nvlist_t *errlist = NULL;
3664 /*
3665 * Don't bother writing a property if its value won't
3666 * change (and avoid the unnecessary security checks).
3667 *
3668 * The first receive after SPA_VERSION_RECVD_PROPS is a
3669 * special case where we blow away all local properties
3670 * regardless.
3671 */
3672 if (!first_recvd_props)
3673 props_reduce(props, origprops);
3674 if (zfs_check_clearable(tofs, origprops,
3675 &errlist) != 0)
3676 (void) nvlist_merge(errors, errlist, 0);
3677 nvlist_free(errlist);
3678 }
3679
3680 dmu_objset_rele(os, FTAG);
3681 }
3682
3683 if (zc->zc_string[0]) {
3684 error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3685 if (error)
3686 goto out;
3687 }
3688
3689 error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3690 &zc->zc_begin_record, force, origin, &drc);
3691 if (origin)
3692 dmu_objset_rele(origin, FTAG);
3693 if (error)
3694 goto out;
3695
3696 /*
3697 * Set properties before we receive the stream so that they are applied
3698 * to the new data. Note that we must call dmu_recv_stream() if
3699 * dmu_recv_begin() succeeds.
3700 */
3701 if (props) {
3702 nvlist_t *errlist;
3703
3704 if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3705 if (drc.drc_newfs) {
3706 if (spa_version(os->os_spa) >=
3707 SPA_VERSION_RECVD_PROPS)
3708 first_recvd_props = B_TRUE;
3709 } else if (origprops != NULL) {
3710 if (clear_received_props(os, tofs, origprops,
3711 first_recvd_props ? NULL : props) != 0)
3712 zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3713 } else {
3714 zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3715 }
3716 dsl_prop_set_hasrecvd(os);
3717 } else if (!drc.drc_newfs) {
3718 zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3719 }
3720
3721 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3722 props, &errlist);
3723 (void) nvlist_merge(errors, errlist, 0);
3724 nvlist_free(errlist);
3725 }
3726
3727 if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3728 /*
3729 * Caller made zc->zc_nvlist_dst less than the minimum expected
3730 * size or supplied an invalid address.
3731 */
3732 props_error = EINVAL;
3733 }
3734
3735 off = fp->f_offset;
3736 error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
3737 &zc->zc_action_handle);
3738
3739 if (error == 0) {
3740 zfsvfs_t *zfsvfs = NULL;
3741
3742 if (getzfsvfs(tofs, &zfsvfs) == 0) {
3743 /* online recv */
3744 int end_err;
3745
3746 error = zfs_suspend_fs(zfsvfs);
3747 /*
3748 * If the suspend fails, then the recv_end will
3749 * likely also fail, and clean up after itself.
3750 */
3751 end_err = dmu_recv_end(&drc);
3752 if (error == 0)
3753 error = zfs_resume_fs(zfsvfs, tofs);
3754 error = error ? error : end_err;
3755 VFS_RELE(zfsvfs->z_vfs);
3756 } else {
3757 error = dmu_recv_end(&drc);
3758 }
3759 }
3760
3761 zc->zc_cookie = off - fp->f_offset;
3762 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3763 fp->f_offset = off;
3764
3765 #ifdef DEBUG
3766 if (zfs_ioc_recv_inject_err) {
3767 zfs_ioc_recv_inject_err = B_FALSE;
3768 error = 1;
3769 }
3770 #endif
3771 /*
3772 * On error, restore the original props.
3773 */
3774 if (error && props) {
3775 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3776 if (clear_received_props(os, tofs, props, NULL) != 0) {
3777 /*
3778 * We failed to clear the received properties.
3779 * Since we may have left a $recvd value on the
3780 * system, we can't clear the $hasrecvd flag.
3781 */
3782 zc->zc_obj |= ZPROP_ERR_NORESTORE;
3783 } else if (first_recvd_props) {
3784 dsl_prop_unset_hasrecvd(os);
3785 }
3786 dmu_objset_rele(os, FTAG);
3787 } else if (!drc.drc_newfs) {
3788 /* We failed to clear the received properties. */
3789 zc->zc_obj |= ZPROP_ERR_NORESTORE;
3790 }
3791
3792 if (origprops == NULL && !drc.drc_newfs) {
3793 /* We failed to stash the original properties. */
3794 zc->zc_obj |= ZPROP_ERR_NORESTORE;
3795 }
3796
3797 /*
3798 * dsl_props_set() will not convert RECEIVED to LOCAL on or
3799 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3800 * explictly if we're restoring local properties cleared in the
3801 * first new-style receive.
3802 */
3803 if (origprops != NULL &&
3804 zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3805 ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3806 origprops, NULL) != 0) {
3807 /*
3808 * We stashed the original properties but failed to
3809 * restore them.
3810 */
3811 zc->zc_obj |= ZPROP_ERR_NORESTORE;
3812 }
3813 }
3814 out:
3815 nvlist_free(props);
3816 nvlist_free(origprops);
3817 nvlist_free(errors);
3818 releasef(fd);
3819
3820 if (error == 0)
3821 error = props_error;
3822
3823 return (error);
3824 }
3825
3826 /*
3827 * inputs:
3828 * zc_name name of snapshot to send
3829 * zc_cookie file descriptor to send stream to
3830 * zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
3831 * zc_sendobj objsetid of snapshot to send
3832 * zc_fromobj objsetid of incremental fromsnap (may be zero)
3833 * zc_guid if set, estimate size of stream only. zc_cookie is ignored.
3834 * output size in zc_objset_type.
3835 *
3836 * outputs: none
3837 */
3838 static int
3839 zfs_ioc_send(zfs_cmd_t *zc)
3840 {
3841 objset_t *fromsnap = NULL;
3842 objset_t *tosnap;
3843 int error;
3844 offset_t off;
3845 dsl_dataset_t *ds;
3846 dsl_dataset_t *dsfrom = NULL;
3847 spa_t *spa;
3848 dsl_pool_t *dp;
3849 boolean_t estimate = (zc->zc_guid != 0);
3850
3851 error = spa_open(zc->zc_name, &spa, FTAG);
3852 if (error)
3853 return (error);
3854
3855 dp = spa_get_dsl(spa);
3856 rw_enter(&dp->dp_config_rwlock, RW_READER);
3857 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
3858 rw_exit(&dp->dp_config_rwlock);
3859 if (error) {
3860 spa_close(spa, FTAG);
3861 return (error);
3862 }
3863
3864 error = dmu_objset_from_ds(ds, &tosnap);
3865 if (error) {
3866 dsl_dataset_rele(ds, FTAG);
3867 spa_close(spa, FTAG);
3868 return (error);
3869 }
3870
3871 if (zc->zc_fromobj != 0) {
3872 rw_enter(&dp->dp_config_rwlock, RW_READER);
3873 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
3874 rw_exit(&dp->dp_config_rwlock);
3875 spa_close(spa, FTAG);
3876 if (error) {
3877 dsl_dataset_rele(ds, FTAG);
3878 return (error);
3879 }
3880 error = dmu_objset_from_ds(dsfrom, &fromsnap);
3881 if (error) {
3882 dsl_dataset_rele(dsfrom, FTAG);
3883 dsl_dataset_rele(ds, FTAG);
3884 return (error);
3885 }
3886 } else {
3887 spa_close(spa, FTAG);
3888 }
3889
3890 if (estimate) {
3891 error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
3892 &zc->zc_objset_type);
3893 } else {
3894 file_t *fp = getf(zc->zc_cookie);
3895 if (fp == NULL) {
3896 dsl_dataset_rele(ds, FTAG);
3897 if (dsfrom)
3898 dsl_dataset_rele(dsfrom, FTAG);
3899 return (EBADF);
3900 }
3901
3902 off = fp->f_offset;
3903 error = dmu_send(tosnap, fromsnap, zc->zc_obj,
3904 zc->zc_cookie, fp->f_vnode, &off);
3905
3906 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3907 fp->f_offset = off;
3908 releasef(zc->zc_cookie);
3909 }
3910 if (dsfrom)
3911 dsl_dataset_rele(dsfrom, FTAG);
3912 dsl_dataset_rele(ds, FTAG);
3913 return (error);
3914 }
3915
3916 /*
3917 * inputs:
3918 * zc_name name of snapshot on which to report progress
3919 * zc_cookie file descriptor of send stream
3920 *
3921 * outputs:
3922 * zc_cookie number of bytes written in send stream thus far
3923 */
3924 static int
3925 zfs_ioc_send_progress(zfs_cmd_t *zc)
3926 {
3927 dsl_dataset_t *ds;
3928 dmu_sendarg_t *dsp = NULL;
3929 int error;
3930
3931 if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
3932 return (error);
3933
3934 mutex_enter(&ds->ds_sendstream_lock);
3935
3936 /*
3937 * Iterate over all the send streams currently active on this dataset.
3938 * If there's one which matches the specified file descriptor _and_ the
3939 * stream was started by the current process, return the progress of
3940 * that stream.
3941 */
3942 for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
3943 dsp = list_next(&ds->ds_sendstreams, dsp)) {
3944 if (dsp->dsa_outfd == zc->zc_cookie &&
3945 dsp->dsa_proc == curproc)
3946 break;
3947 }
3948
3949 if (dsp != NULL)
3950 zc->zc_cookie = *(dsp->dsa_off);
3951 else
3952 error = ENOENT;
3953
3954 mutex_exit(&ds->ds_sendstream_lock);
3955 dsl_dataset_rele(ds, FTAG);
3956 return (error);
3957 }
3958
3959 static int
3960 zfs_ioc_inject_fault(zfs_cmd_t *zc)
3961 {
3962 int id, error;
3963
3964 error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3965 &zc->zc_inject_record);
3966
3967 if (error == 0)
3968 zc->zc_guid = (uint64_t)id;
3969
3970 return (error);
3971 }
3972
3973 static int
3974 zfs_ioc_clear_fault(zfs_cmd_t *zc)
3975 {
3976 return (zio_clear_fault((int)zc->zc_guid));
3977 }
3978
3979 static int
3980 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3981 {
3982 int id = (int)zc->zc_guid;
3983 int error;
3984
3985 error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3986 &zc->zc_inject_record);
3987
3988 zc->zc_guid = id;
3989
3990 return (error);
3991 }
3992
3993 static int
3994 zfs_ioc_error_log(zfs_cmd_t *zc)
3995 {
3996 spa_t *spa;
3997 int error;
3998 size_t count = (size_t)zc->zc_nvlist_dst_size;
3999
4000 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4001 return (error);
4002
4003 error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4004 &count);
4005 if (error == 0)
4006 zc->zc_nvlist_dst_size = count;
4007 else
4008 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4009
4010 spa_close(spa, FTAG);
4011
4012 return (error);
4013 }
4014
4015 static int
4016 zfs_ioc_clear(zfs_cmd_t *zc)
4017 {
4018 spa_t *spa;
4019 vdev_t *vd;
4020 int error;
4021
4022 /*
4023 * On zpool clear we also fix up missing slogs
4024 */
4025 mutex_enter(&spa_namespace_lock);
4026 spa = spa_lookup(zc->zc_name);
4027 if (spa == NULL) {
4028 mutex_exit(&spa_namespace_lock);
4029 return (EIO);
4030 }
4031 if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4032 /* we need to let spa_open/spa_load clear the chains */
4033 spa_set_log_state(spa, SPA_LOG_CLEAR);
4034 }
4035 spa->spa_last_open_failed = 0;
4036 mutex_exit(&spa_namespace_lock);
4037
4038 if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4039 error = spa_open(zc->zc_name, &spa, FTAG);
4040 } else {
4041 nvlist_t *policy;
4042 nvlist_t *config = NULL;
4043
4044 if (zc->zc_nvlist_src == NULL)
4045 return (EINVAL);
4046
4047 if ((error = get_nvlist(zc->zc_nvlist_src,
4048 zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4049 error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4050 policy, &config);
4051 if (config != NULL) {
4052 int err;
4053
4054 if ((err = put_nvlist(zc, config)) != 0)
4055 error = err;
4056 nvlist_free(config);
4057 }
4058 nvlist_free(policy);
4059 }
4060 }
4061
4062 if (error)
4063 return (error);
4064
4065 spa_vdev_state_enter(spa, SCL_NONE);
4066
4067 if (zc->zc_guid == 0) {
4068 vd = NULL;
4069 } else {
4070 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4071 if (vd == NULL) {
4072 (void) spa_vdev_state_exit(spa, NULL, ENODEV);
4073 spa_close(spa, FTAG);
4074 return (ENODEV);
4075 }
4076 }
4077
4078 vdev_clear(spa, vd);
4079
4080 (void) spa_vdev_state_exit(spa, NULL, 0);
4081
4082 /*
4083 * Resume any suspended I/Os.
4084 */
4085 if (zio_resume(spa) != 0)
4086 error = EIO;
4087
4088 spa_close(spa, FTAG);
4089
4090 return (error);
4091 }
4092
4093 static int
4094 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4095 {
4096 spa_t *spa;
4097 int error;
4098
4099 error = spa_open(zc->zc_name, &spa, FTAG);
4100 if (error)
4101 return (error);
4102
4103 spa_vdev_state_enter(spa, SCL_NONE);
4104 vdev_reopen(spa->spa_root_vdev);
4105 (void) spa_vdev_state_exit(spa, NULL, 0);
4106 spa_close(spa, FTAG);
4107 return (0);
4108 }
4109 /*
4110 * inputs:
4111 * zc_name name of filesystem
4112 * zc_value name of origin snapshot
4113 *
4114 * outputs:
4115 * zc_string name of conflicting snapshot, if there is one
4116 */
4117 static int
4118 zfs_ioc_promote(zfs_cmd_t *zc)
4119 {
4120 char *cp;
4121
4122 /*
4123 * We don't need to unmount *all* the origin fs's snapshots, but
4124 * it's easier.
4125 */
4126 cp = strchr(zc->zc_value, '@');
4127 if (cp)
4128 *cp = '\0';
4129 (void) dmu_objset_find(zc->zc_value,
4130 zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
4131 return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4132 }
4133
4134 /*
4135 * Retrieve a single {user|group}{used|quota}@... property.
4136 *
4137 * inputs:
4138 * zc_name name of filesystem
4139 * zc_objset_type zfs_userquota_prop_t
4140 * zc_value domain name (eg. "S-1-234-567-89")
4141 * zc_guid RID/UID/GID
4142 *
4143 * outputs:
4144 * zc_cookie property value
4145 */
4146 static int
4147 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4148 {
4149 zfsvfs_t *zfsvfs;
4150 int error;
4151
4152 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4153 return (EINVAL);
4154
4155 error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4156 if (error)
4157 return (error);
4158
4159 error = zfs_userspace_one(zfsvfs,
4160 zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4161 zfsvfs_rele(zfsvfs, FTAG);
4162
4163 return (error);
4164 }
4165
4166 /*
4167 * inputs:
4168 * zc_name name of filesystem
4169 * zc_cookie zap cursor
4170 * zc_objset_type zfs_userquota_prop_t
4171 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4172 *
4173 * outputs:
4174 * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
4175 * zc_cookie zap cursor
4176 */
4177 static int
4178 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4179 {
4180 zfsvfs_t *zfsvfs;
4181 int bufsize = zc->zc_nvlist_dst_size;
4182
4183 if (bufsize <= 0)
4184 return (ENOMEM);
4185
4186 int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4187 if (error)
4188 return (error);
4189
4190 void *buf = kmem_alloc(bufsize, KM_SLEEP);
4191
4192 error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4193 buf, &zc->zc_nvlist_dst_size);
4194
4195 if (error == 0) {
4196 error = xcopyout(buf,
4197 (void *)(uintptr_t)zc->zc_nvlist_dst,
4198 zc->zc_nvlist_dst_size);
4199 }
4200 kmem_free(buf, bufsize);
4201 zfsvfs_rele(zfsvfs, FTAG);
4202
4203 return (error);
4204 }
4205
4206 /*
4207 * inputs:
4208 * zc_name name of filesystem
4209 *
4210 * outputs:
4211 * none
4212 */
4213 static int
4214 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4215 {
4216 objset_t *os;
4217 int error = 0;
4218 zfsvfs_t *zfsvfs;
4219
4220 if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4221 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4222 /*
4223 * If userused is not enabled, it may be because the
4224 * objset needs to be closed & reopened (to grow the
4225 * objset_phys_t). Suspend/resume the fs will do that.
4226 */
4227 error = zfs_suspend_fs(zfsvfs);
4228 if (error == 0)
4229 error = zfs_resume_fs(zfsvfs, zc->zc_name);
4230 }
4231 if (error == 0)
4232 error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4233 VFS_RELE(zfsvfs->z_vfs);
4234 } else {
4235 /* XXX kind of reading contents without owning */
4236 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4237 if (error)
4238 return (error);
4239
4240 error = dmu_objset_userspace_upgrade(os);
4241 dmu_objset_rele(os, FTAG);
4242 }
4243
4244 return (error);
4245 }
4246
4247 /*
4248 * We don't want to have a hard dependency
4249 * against some special symbols in sharefs
4250 * nfs, and smbsrv. Determine them if needed when
4251 * the first file system is shared.
4252 * Neither sharefs, nfs or smbsrv are unloadable modules.
4253 */
4254 int (*znfsexport_fs)(void *arg);
4255 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4256 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4257
4258 int zfs_nfsshare_inited;
4259 int zfs_smbshare_inited;
4260
4261 ddi_modhandle_t nfs_mod;
4262 ddi_modhandle_t sharefs_mod;
4263 ddi_modhandle_t smbsrv_mod;
4264 kmutex_t zfs_share_lock;
4265
4266 static int
4267 zfs_init_sharefs()
4268 {
4269 int error;
4270
4271 ASSERT(MUTEX_HELD(&zfs_share_lock));
4272 /* Both NFS and SMB shares also require sharetab support. */
4273 if (sharefs_mod == NULL && ((sharefs_mod =
4274 ddi_modopen("fs/sharefs",
4275 KRTLD_MODE_FIRST, &error)) == NULL)) {
4276 return (ENOSYS);
4277 }
4278 if (zshare_fs == NULL && ((zshare_fs =
4279 (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4280 ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4281 return (ENOSYS);
4282 }
4283 return (0);
4284 }
4285
4286 static int
4287 zfs_ioc_share(zfs_cmd_t *zc)
4288 {
4289 int error;
4290 int opcode;
4291
4292 switch (zc->zc_share.z_sharetype) {
4293 case ZFS_SHARE_NFS:
4294 case ZFS_UNSHARE_NFS:
4295 if (zfs_nfsshare_inited == 0) {
4296 mutex_enter(&zfs_share_lock);
4297 if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4298 KRTLD_MODE_FIRST, &error)) == NULL)) {
4299 mutex_exit(&zfs_share_lock);
4300 return (ENOSYS);
4301 }
4302 if (znfsexport_fs == NULL &&
4303 ((znfsexport_fs = (int (*)(void *))
4304 ddi_modsym(nfs_mod,
4305 "nfs_export", &error)) == NULL)) {
4306 mutex_exit(&zfs_share_lock);
4307 return (ENOSYS);
4308 }
4309 error = zfs_init_sharefs();
4310 if (error) {
4311 mutex_exit(&zfs_share_lock);
4312 return (ENOSYS);
4313 }
4314 zfs_nfsshare_inited = 1;
4315 mutex_exit(&zfs_share_lock);
4316 }
4317 break;
4318 case ZFS_SHARE_SMB:
4319 case ZFS_UNSHARE_SMB:
4320 if (zfs_smbshare_inited == 0) {
4321 mutex_enter(&zfs_share_lock);
4322 if (smbsrv_mod == NULL && ((smbsrv_mod =
4323 ddi_modopen("drv/smbsrv",
4324 KRTLD_MODE_FIRST, &error)) == NULL)) {
4325 mutex_exit(&zfs_share_lock);
4326 return (ENOSYS);
4327 }
4328 if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4329 (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4330 "smb_server_share", &error)) == NULL)) {
4331 mutex_exit(&zfs_share_lock);
4332 return (ENOSYS);
4333 }
4334 error = zfs_init_sharefs();
4335 if (error) {
4336 mutex_exit(&zfs_share_lock);
4337 return (ENOSYS);
4338 }
4339 zfs_smbshare_inited = 1;
4340 mutex_exit(&zfs_share_lock);
4341 }
4342 break;
4343 default:
4344 return (EINVAL);
4345 }
4346
4347 switch (zc->zc_share.z_sharetype) {
4348 case ZFS_SHARE_NFS:
4349 case ZFS_UNSHARE_NFS:
4350 if (error =
4351 znfsexport_fs((void *)
4352 (uintptr_t)zc->zc_share.z_exportdata))
4353 return (error);
4354 break;
4355 case ZFS_SHARE_SMB:
4356 case ZFS_UNSHARE_SMB:
4357 if (error = zsmbexport_fs((void *)
4358 (uintptr_t)zc->zc_share.z_exportdata,
4359 zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4360 B_TRUE: B_FALSE)) {
4361 return (error);
4362 }
4363 break;
4364 }
4365
4366 opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4367 zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4368 SHAREFS_ADD : SHAREFS_REMOVE;
4369
4370 /*
4371 * Add or remove share from sharetab
4372 */
4373 error = zshare_fs(opcode,
4374 (void *)(uintptr_t)zc->zc_share.z_sharedata,
4375 zc->zc_share.z_sharemax);
4376
4377 return (error);
4378
4379 }
4380
4381 ace_t full_access[] = {
4382 {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4383 };
4384
4385 /*
4386 * inputs:
4387 * zc_name name of containing filesystem
4388 * zc_obj object # beyond which we want next in-use object #
4389 *
4390 * outputs:
4391 * zc_obj next in-use object #
4392 */
4393 static int
4394 zfs_ioc_next_obj(zfs_cmd_t *zc)
4395 {
4396 objset_t *os = NULL;
4397 int error;
4398
4399 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4400 if (error)
4401 return (error);
4402
4403 error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
4404 os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
4405
4406 dmu_objset_rele(os, FTAG);
4407 return (error);
4408 }
4409
4410 /*
4411 * inputs:
4412 * zc_name name of filesystem
4413 * zc_value prefix name for snapshot
4414 * zc_cleanup_fd cleanup-on-exit file descriptor for calling process
4415 *
4416 * outputs:
4417 */
4418 static int
4419 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
4420 {
4421 char *snap_name;
4422 int error;
4423
4424 snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
4425 (u_longlong_t)ddi_get_lbolt64());
4426
4427 if (strlen(snap_name) >= MAXNAMELEN) {
4428 strfree(snap_name);
4429 return (E2BIG);
4430 }
4431
4432 error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
4433 NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
4434 if (error != 0) {
4435 strfree(snap_name);
4436 return (error);
4437 }
4438
4439 (void) strcpy(zc->zc_value, snap_name);
4440 strfree(snap_name);
4441 return (0);
4442 }
4443
4444 /*
4445 * inputs:
4446 * zc_name name of "to" snapshot
4447 * zc_value name of "from" snapshot
4448 * zc_cookie file descriptor to write diff data on
4449 *
4450 * outputs:
4451 * dmu_diff_record_t's to the file descriptor
4452 */
4453 static int
4454 zfs_ioc_diff(zfs_cmd_t *zc)
4455 {
4456 objset_t *fromsnap;
4457 objset_t *tosnap;
4458 file_t *fp;
4459 offset_t off;
4460 int error;
4461
4462 error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
4463 if (error)
4464 return (error);
4465
4466 error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
4467 if (error) {
4468 dmu_objset_rele(tosnap, FTAG);
4469 return (error);
4470 }
4471
4472 fp = getf(zc->zc_cookie);
4473 if (fp == NULL) {
4474 dmu_objset_rele(fromsnap, FTAG);
4475 dmu_objset_rele(tosnap, FTAG);
4476 return (EBADF);
4477 }
4478
4479 off = fp->f_offset;
4480
4481 error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off);
4482
4483 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4484 fp->f_offset = off;
4485 releasef(zc->zc_cookie);
4486
4487 dmu_objset_rele(fromsnap, FTAG);
4488 dmu_objset_rele(tosnap, FTAG);
4489 return (error);
4490 }
4491
4492 /*
4493 * Remove all ACL files in shares dir
4494 */
4495 static int
4496 zfs_smb_acl_purge(znode_t *dzp)
4497 {
4498 zap_cursor_t zc;
4499 zap_attribute_t zap;
4500 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4501 int error;
4502
4503 for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4504 (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4505 zap_cursor_advance(&zc)) {
4506 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4507 NULL, 0)) != 0)
4508 break;
4509 }
4510 zap_cursor_fini(&zc);
4511 return (error);
4512 }
4513
4514 static int
4515 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4516 {
4517 vnode_t *vp;
4518 znode_t *dzp;
4519 vnode_t *resourcevp = NULL;
4520 znode_t *sharedir;
4521 zfsvfs_t *zfsvfs;
4522 nvlist_t *nvlist;
4523 char *src, *target;
4524 vattr_t vattr;
4525 vsecattr_t vsec;
4526 int error = 0;
4527
4528 if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4529 NO_FOLLOW, NULL, &vp)) != 0)
4530 return (error);
4531
4532 /* Now make sure mntpnt and dataset are ZFS */
4533
4534 if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4535 (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4536 zc->zc_name) != 0)) {
4537 VN_RELE(vp);
4538 return (EINVAL);
4539 }
4540
4541 dzp = VTOZ(vp);
4542 zfsvfs = dzp->z_zfsvfs;
4543 ZFS_ENTER(zfsvfs);
4544
4545 /*
4546 * Create share dir if its missing.
4547 */
4548 mutex_enter(&zfsvfs->z_lock);
4549 if (zfsvfs->z_shares_dir == 0) {
4550 dmu_tx_t *tx;
4551
4552 tx = dmu_tx_create(zfsvfs->z_os);
4553 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4554 ZFS_SHARES_DIR);
4555 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4556 error = dmu_tx_assign(tx, TXG_WAIT);
4557 if (error) {
4558 dmu_tx_abort(tx);
4559 } else {
4560 error = zfs_create_share_dir(zfsvfs, tx);
4561 dmu_tx_commit(tx);
4562 }
4563 if (error) {
4564 mutex_exit(&zfsvfs->z_lock);
4565 VN_RELE(vp);
4566 ZFS_EXIT(zfsvfs);
4567 return (error);
4568 }
4569 }
4570 mutex_exit(&zfsvfs->z_lock);
4571
4572 ASSERT(zfsvfs->z_shares_dir);
4573 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4574 VN_RELE(vp);
4575 ZFS_EXIT(zfsvfs);
4576 return (error);
4577 }
4578
4579 switch (zc->zc_cookie) {
4580 case ZFS_SMB_ACL_ADD:
4581 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4582 vattr.va_type = VREG;
4583 vattr.va_mode = S_IFREG|0777;
4584 vattr.va_uid = 0;
4585 vattr.va_gid = 0;
4586
4587 vsec.vsa_mask = VSA_ACE;
4588 vsec.vsa_aclentp = &full_access;
4589 vsec.vsa_aclentsz = sizeof (full_access);
4590 vsec.vsa_aclcnt = 1;
4591
4592 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4593 &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4594 if (resourcevp)
4595 VN_RELE(resourcevp);
4596 break;
4597
4598 case ZFS_SMB_ACL_REMOVE:
4599 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4600 NULL, 0);
4601 break;
4602
4603 case ZFS_SMB_ACL_RENAME:
4604 if ((error = get_nvlist(zc->zc_nvlist_src,
4605 zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4606 VN_RELE(vp);
4607 ZFS_EXIT(zfsvfs);
4608 return (error);
4609 }
4610 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4611 nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4612 &target)) {
4613 VN_RELE(vp);
4614 VN_RELE(ZTOV(sharedir));
4615 ZFS_EXIT(zfsvfs);
4616 nvlist_free(nvlist);
4617 return (error);
4618 }
4619 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4620 kcred, NULL, 0);
4621 nvlist_free(nvlist);
4622 break;
4623
4624 case ZFS_SMB_ACL_PURGE:
4625 error = zfs_smb_acl_purge(sharedir);
4626 break;
4627
4628 default:
4629 error = EINVAL;
4630 break;
4631 }
4632
4633 VN_RELE(vp);
4634 VN_RELE(ZTOV(sharedir));
4635
4636 ZFS_EXIT(zfsvfs);
4637
4638 return (error);
4639 }
4640
4641 /*
4642 * inputs:
4643 * zc_name name of filesystem
4644 * zc_value short name of snap
4645 * zc_string user-supplied tag for this hold
4646 * zc_cookie recursive flag
4647 * zc_temphold set if hold is temporary
4648 * zc_cleanup_fd cleanup-on-exit file descriptor for calling process
4649 * zc_sendobj if non-zero, the objid for zc_name@zc_value
4650 * zc_createtxg if zc_sendobj is non-zero, snap must have zc_createtxg
4651 *
4652 * outputs: none
4653 */
4654 static int
4655 zfs_ioc_hold(zfs_cmd_t *zc)
4656 {
4657 boolean_t recursive = zc->zc_cookie;
4658 spa_t *spa;
4659 dsl_pool_t *dp;
4660 dsl_dataset_t *ds;
4661 int error;
4662 minor_t minor = 0;
4663
4664 if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4665 return (EINVAL);
4666
4667 if (zc->zc_sendobj == 0) {
4668 return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4669 zc->zc_string, recursive, zc->zc_temphold,
4670 zc->zc_cleanup_fd));
4671 }
4672
4673 if (recursive)
4674 return (EINVAL);
4675
4676 error = spa_open(zc->zc_name, &spa, FTAG);
4677 if (error)
4678 return (error);
4679
4680 dp = spa_get_dsl(spa);
4681 rw_enter(&dp->dp_config_rwlock, RW_READER);
4682 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
4683 rw_exit(&dp->dp_config_rwlock);
4684 spa_close(spa, FTAG);
4685 if (error)
4686 return (error);
4687
4688 /*
4689 * Until we have a hold on this snapshot, it's possible that
4690 * zc_sendobj could've been destroyed and reused as part
4691 * of a later txg. Make sure we're looking at the right object.
4692 */
4693 if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
4694 dsl_dataset_rele(ds, FTAG);
4695 return (ENOENT);
4696 }
4697
4698 if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
4699 error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
4700 if (error) {
4701 dsl_dataset_rele(ds, FTAG);
4702 return (error);
4703 }
4704 }
4705
4706 error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
4707 zc->zc_temphold);
4708 if (minor != 0) {
4709 if (error == 0) {
4710 dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
4711 minor);
4712 }
4713 zfs_onexit_fd_rele(zc->zc_cleanup_fd);
4714 }
4715 dsl_dataset_rele(ds, FTAG);
4716
4717 return (error);
4718 }
4719
4720 /*
4721 * inputs:
4722 * zc_name name of dataset from which we're releasing a user hold
4723 * zc_value short name of snap
4724 * zc_string user-supplied tag for this hold
4725 * zc_cookie recursive flag
4726 *
4727 * outputs: none
4728 */
4729 static int
4730 zfs_ioc_release(zfs_cmd_t *zc)
4731 {
4732 boolean_t recursive = zc->zc_cookie;
4733
4734 if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4735 return (EINVAL);
4736
4737 return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4738 zc->zc_string, recursive));
4739 }
4740
4741 /*
4742 * inputs:
4743 * zc_name name of filesystem
4744 *
4745 * outputs:
4746 * zc_nvlist_src{_size} nvlist of snapshot holds
4747 */
4748 static int
4749 zfs_ioc_get_holds(zfs_cmd_t *zc)
4750 {
4751 nvlist_t *nvp;
4752 int error;
4753
4754 if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4755 error = put_nvlist(zc, nvp);
4756 nvlist_free(nvp);
4757 }
4758
4759 return (error);
4760 }
4761
4762 /*
4763 * inputs:
4764 * zc_name name of new filesystem or snapshot
4765 * zc_value full name of old snapshot
4766 *
4767 * outputs:
4768 * zc_cookie space in bytes
4769 * zc_objset_type compressed space in bytes
4770 * zc_perm_action uncompressed space in bytes
4771 */
4772 static int
4773 zfs_ioc_space_written(zfs_cmd_t *zc)
4774 {
4775 int error;
4776 dsl_dataset_t *new, *old;
4777
4778 error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4779 if (error != 0)
4780 return (error);
4781 error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4782 if (error != 0) {
4783 dsl_dataset_rele(new, FTAG);
4784 return (error);
4785 }
4786
4787 error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
4788 &zc->zc_objset_type, &zc->zc_perm_action);
4789 dsl_dataset_rele(old, FTAG);
4790 dsl_dataset_rele(new, FTAG);
4791 return (error);
4792 }
4793
4794 /*
4795 * inputs:
4796 * zc_name full name of last snapshot
4797 * zc_value full name of first snapshot
4798 *
4799 * outputs:
4800 * zc_cookie space in bytes
4801 * zc_objset_type compressed space in bytes
4802 * zc_perm_action uncompressed space in bytes
4803 */
4804 static int
4805 zfs_ioc_space_snaps(zfs_cmd_t *zc)
4806 {
4807 int error;
4808 dsl_dataset_t *new, *old;
4809
4810 error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4811 if (error != 0)
4812 return (error);
4813 error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4814 if (error != 0) {
4815 dsl_dataset_rele(new, FTAG);
4816 return (error);
4817 }
4818
4819 error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
4820 &zc->zc_objset_type, &zc->zc_perm_action);
4821 dsl_dataset_rele(old, FTAG);
4822 dsl_dataset_rele(new, FTAG);
4823 return (error);
4824 }
4825
4826 /*
4827 * pool create, destroy, and export don't log the history as part of
4828 * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4829 * do the logging of those commands.
4830 */
4831 static zfs_ioc_vec_t zfs_ioc_vec[] = {
4832 { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4833 POOL_CHECK_NONE },
4834 { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4835 POOL_CHECK_NONE },
4836 { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4837 POOL_CHECK_NONE },
4838 { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4839 POOL_CHECK_NONE },
4840 { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE,
4841 POOL_CHECK_NONE },
4842 { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4843 POOL_CHECK_NONE },
4844 { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4845 POOL_CHECK_NONE },
4846 { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4847 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4848 { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4849 POOL_CHECK_READONLY },
4850 { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4851 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4852 { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4853 POOL_CHECK_NONE },
4854 { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4855 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4856 { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4857 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4858 { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4859 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4860 { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4861 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4862 { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4863 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4864 { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4865 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4866 { zfs_ioc_vdev_setfru, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4867 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4868 { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4869 POOL_CHECK_SUSPENDED },
4870 { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4871 POOL_CHECK_NONE },
4872 { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4873 POOL_CHECK_SUSPENDED },
4874 { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4875 POOL_CHECK_SUSPENDED },
4876 { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE,
4877 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4878 { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE,
4879 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4880 { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4881 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4882 { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4883 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4884 { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE,
4885 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4886 { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
4887 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4888 { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE,
4889 POOL_CHECK_NONE },
4890 { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4891 POOL_CHECK_NONE },
4892 { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4893 POOL_CHECK_NONE },
4894 { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4895 POOL_CHECK_NONE },
4896 { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4897 POOL_CHECK_NONE },
4898 { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4899 POOL_CHECK_NONE },
4900 { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4901 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4902 { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4903 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4904 { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
4905 POOL_CHECK_NONE },
4906 { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4907 POOL_CHECK_SUSPENDED },
4908 { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4909 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4910 { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4911 POOL_CHECK_NONE },
4912 { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4913 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4914 { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4915 POOL_CHECK_NONE },
4916 { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE,
4917 POOL_CHECK_NONE },
4918 { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4919 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4920 { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4921 POOL_CHECK_NONE },
4922 { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME,
4923 B_FALSE, POOL_CHECK_NONE },
4924 { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME,
4925 B_FALSE, POOL_CHECK_NONE },
4926 { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4927 DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4928 { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE,
4929 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4930 { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4931 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4932 { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4933 POOL_CHECK_SUSPENDED },
4934 { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4935 POOL_CHECK_NONE },
4936 { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4937 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4938 { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4939 POOL_CHECK_NONE },
4940 { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4941 POOL_CHECK_NONE },
4942 { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
4943 B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4944 { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4945 POOL_CHECK_SUSPENDED },
4946 { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4947 POOL_CHECK_SUSPENDED },
4948 { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4949 POOL_CHECK_SUSPENDED },
4950 { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive,
4951 DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4952 { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4953 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4954 { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4955 POOL_CHECK_SUSPENDED },
4956 { zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4957 POOL_CHECK_NONE }
4958 };
4959
4960 int
4961 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
4962 zfs_ioc_poolcheck_t check)
4963 {
4964 spa_t *spa;
4965 int error;
4966
4967 ASSERT(type == POOL_NAME || type == DATASET_NAME);
4968
4969 if (check & POOL_CHECK_NONE)
4970 return (0);
4971
4972 error = spa_open(name, &spa, FTAG);
4973 if (error == 0) {
4974 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
4975 error = EAGAIN;
4976 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
4977 error = EROFS;
4978 spa_close(spa, FTAG);
4979 }
4980 return (error);
4981 }
4982
4983 /*
4984 * Find a free minor number.
4985 */
4986 minor_t
4987 zfsdev_minor_alloc(void)
4988 {
4989 static minor_t last_minor;
4990 minor_t m;
4991
4992 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4993
4994 for (m = last_minor + 1; m != last_minor; m++) {
4995 if (m > ZFSDEV_MAX_MINOR)
4996 m = 1;
4997 if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
4998 last_minor = m;
4999 return (m);
5000 }
5001 }
5002
5003 return (0);
5004 }
5005
5006 static int
5007 zfs_ctldev_init(dev_t *devp)
5008 {
5009 minor_t minor;
5010 zfs_soft_state_t *zs;
5011
5012 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5013 ASSERT(getminor(*devp) == 0);
5014
5015 minor = zfsdev_minor_alloc();
5016 if (minor == 0)
5017 return (ENXIO);
5018
5019 if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
5020 return (EAGAIN);
5021
5022 *devp = makedevice(getemajor(*devp), minor);
5023
5024 zs = ddi_get_soft_state(zfsdev_state, minor);
5025 zs->zss_type = ZSST_CTLDEV;
5026 zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
5027
5028 return (0);
5029 }
5030
5031 static void
5032 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
5033 {
5034 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5035
5036 zfs_onexit_destroy(zo);
5037 ddi_soft_state_free(zfsdev_state, minor);
5038 }
5039
5040 void *
5041 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
5042 {
5043 zfs_soft_state_t *zp;
5044
5045 zp = ddi_get_soft_state(zfsdev_state, minor);
5046 if (zp == NULL || zp->zss_type != which)
5047 return (NULL);
5048
5049 return (zp->zss_data);
5050 }
5051
5052 static int
5053 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
5054 {
5055 int error = 0;
5056
5057 if (getminor(*devp) != 0)
5058 return (zvol_open(devp, flag, otyp, cr));
5059
5060 /* This is the control device. Allocate a new minor if requested. */
5061 if (flag & FEXCL) {
5062 mutex_enter(&zfsdev_state_lock);
5063 error = zfs_ctldev_init(devp);
5064 mutex_exit(&zfsdev_state_lock);
5065 }
5066
5067 return (error);
5068 }
5069
5070 static int
5071 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
5072 {
5073 zfs_onexit_t *zo;
5074 minor_t minor = getminor(dev);
5075
5076 if (minor == 0)
5077 return (0);
5078
5079 mutex_enter(&zfsdev_state_lock);
5080 zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
5081 if (zo == NULL) {
5082 mutex_exit(&zfsdev_state_lock);
5083 return (zvol_close(dev, flag, otyp, cr));
5084 }
5085 zfs_ctldev_destroy(zo, minor);
5086 mutex_exit(&zfsdev_state_lock);
5087
5088 return (0);
5089 }
5090
5091 static int
5092 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
5093 {
5094 zfs_cmd_t *zc;
5095 uint_t vec;
5096 int error, rc;
5097 minor_t minor = getminor(dev);
5098
5099 if (minor != 0 &&
5100 zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
5101 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
5102
5103 vec = cmd - ZFS_IOC;
5104 ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
5105
5106 if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
5107 return (EINVAL);
5108
5109 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
5110
5111 error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
5112 if (error != 0)
5113 error = EFAULT;
5114
5115 if ((error == 0) && !(flag & FKIOCTL))
5116 error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
5117
5118 /*
5119 * Ensure that all pool/dataset names are valid before we pass down to
5120 * the lower layers.
5121 */
5122 if (error == 0) {
5123 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5124 zc->zc_iflags = flag & FKIOCTL;
5125 switch (zfs_ioc_vec[vec].zvec_namecheck) {
5126 case POOL_NAME:
5127 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
5128 error = EINVAL;
5129 error = pool_status_check(zc->zc_name,
5130 zfs_ioc_vec[vec].zvec_namecheck,
5131 zfs_ioc_vec[vec].zvec_pool_check);
5132 break;
5133
5134 case DATASET_NAME:
5135 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
5136 error = EINVAL;
5137 error = pool_status_check(zc->zc_name,
5138 zfs_ioc_vec[vec].zvec_namecheck,
5139 zfs_ioc_vec[vec].zvec_pool_check);
5140 break;
5141
5142 case NO_NAME:
5143 break;
5144 }
5145 }
5146
5147 if (error == 0)
5148 error = zfs_ioc_vec[vec].zvec_func(zc);
5149
5150 rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
5151 if (error == 0) {
5152 if (rc != 0)
5153 error = EFAULT;
5154 if (zfs_ioc_vec[vec].zvec_his_log)
5155 zfs_log_history(zc);
5156 }
5157
5158 kmem_free(zc, sizeof (zfs_cmd_t));
5159 return (error);
5160 }
5161
5162 static int
5163 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5164 {
5165 if (cmd != DDI_ATTACH)
5166 return (DDI_FAILURE);
5167
5168 if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
5169 DDI_PSEUDO, 0) == DDI_FAILURE)
5170 return (DDI_FAILURE);
5171
5172 zfs_dip = dip;
5173
5174 ddi_report_dev(dip);
5175
5176 return (DDI_SUCCESS);
5177 }
5178
5179 static int
5180 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5181 {
5182 if (spa_busy() || zfs_busy() || zvol_busy())
5183 return (DDI_FAILURE);
5184
5185 if (cmd != DDI_DETACH)
5186 return (DDI_FAILURE);
5187
5188 zfs_dip = NULL;
5189
5190 ddi_prop_remove_all(dip);
5191 ddi_remove_minor_node(dip, NULL);
5192
5193 return (DDI_SUCCESS);
5194 }
5195
5196 /*ARGSUSED*/
5197 static int
5198 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
5199 {
5200 switch (infocmd) {
5201 case DDI_INFO_DEVT2DEVINFO:
5202 *result = zfs_dip;
5203 return (DDI_SUCCESS);
5204
5205 case DDI_INFO_DEVT2INSTANCE:
5206 *result = (void *)0;
5207 return (DDI_SUCCESS);
5208 }
5209
5210 return (DDI_FAILURE);
5211 }
5212
5213 /*
5214 * OK, so this is a little weird.
5215 *
5216 * /dev/zfs is the control node, i.e. minor 0.
5217 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
5218 *
5219 * /dev/zfs has basically nothing to do except serve up ioctls,
5220 * so most of the standard driver entry points are in zvol.c.
5221 */
5222 static struct cb_ops zfs_cb_ops = {
5223 zfsdev_open, /* open */
5224 zfsdev_close, /* close */
5225 zvol_strategy, /* strategy */
5226 nodev, /* print */
5227 zvol_dump, /* dump */
5228 zvol_read, /* read */
5229 zvol_write, /* write */
5230 zfsdev_ioctl, /* ioctl */
5231 nodev, /* devmap */
5232 nodev, /* mmap */
5233 nodev, /* segmap */
5234 nochpoll, /* poll */
5235 ddi_prop_op, /* prop_op */
5236 NULL, /* streamtab */
5237 D_NEW | D_MP | D_64BIT, /* Driver compatibility flag */
5238 CB_REV, /* version */
5239 nodev, /* async read */
5240 nodev, /* async write */
5241 };
5242
5243 static struct dev_ops zfs_dev_ops = {
5244 DEVO_REV, /* version */
5245 0, /* refcnt */
5246 zfs_info, /* info */
5247 nulldev, /* identify */
5248 nulldev, /* probe */
5249 zfs_attach, /* attach */
5250 zfs_detach, /* detach */
5251 nodev, /* reset */
5252 &zfs_cb_ops, /* driver operations */
5253 NULL, /* no bus operations */
5254 NULL, /* power */
5255 ddi_quiesce_not_needed, /* quiesce */
5256 };
5257
5258 static struct modldrv zfs_modldrv = {
5259 &mod_driverops,
5260 "ZFS storage pool",
5261 &zfs_dev_ops
5262 };
5263
5264 static struct modlinkage modlinkage = {
5265 MODREV_1,
5266 (void *)&zfs_modlfs,
5267 (void *)&zfs_modldrv,
5268 NULL
5269 };
5270
5271
5272 uint_t zfs_fsyncer_key;
5273 extern uint_t rrw_tsd_key;
5274
5275 int
5276 _init(void)
5277 {
5278 int error;
5279
5280 spa_init(FREAD | FWRITE);
5281 zfs_init();
5282 zvol_init();
5283
5284 if ((error = mod_install(&modlinkage)) != 0) {
5285 zvol_fini();
5286 zfs_fini();
5287 spa_fini();
5288 return (error);
5289 }
5290
5291 tsd_create(&zfs_fsyncer_key, NULL);
5292 tsd_create(&rrw_tsd_key, NULL);
5293
5294 error = ldi_ident_from_mod(&modlinkage, &zfs_li);
5295 ASSERT(error == 0);
5296 mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
5297
5298 return (0);
5299 }
5300
5301 int
5302 _fini(void)
5303 {
5304 int error;
5305
5306 if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
5307 return (EBUSY);
5308
5309 if ((error = mod_remove(&modlinkage)) != 0)
5310 return (error);
5311
5312 zvol_fini();
5313 zfs_fini();
5314 spa_fini();
5315 if (zfs_nfsshare_inited)
5316 (void) ddi_modclose(nfs_mod);
5317 if (zfs_smbshare_inited)
5318 (void) ddi_modclose(smbsrv_mod);
5319 if (zfs_nfsshare_inited || zfs_smbshare_inited)
5320 (void) ddi_modclose(sharefs_mod);
5321
5322 tsd_destroy(&zfs_fsyncer_key);
5323 ldi_ident_release(zfs_li);
5324 zfs_li = NULL;
5325 mutex_destroy(&zfs_share_lock);
5326
5327 return (error);
5328 }
5329
5330 int
5331 _info(struct modinfo *modinfop)
5332 {
5333 return (mod_info(&modlinkage, modinfop));
5334 }