1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
28 * Portions Copyright 2011 Martin Matuska
29 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
31 * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
32 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
33 * Copyright (c) 2013 Steven Hartland. All rights reserved.
34 * Copyright (c) 2014 Integros [integros.com]
35 * Copyright 2018 Nexenta Systems, Inc.
36 * Copyright 2016 Toomas Soome <tsoome@me.com>
37 * Copyright 2017 RackTop Systems.
38 * Copyright (c) 2017 Datto Inc.
39 */
40
41 /*
42 * ZFS ioctls.
43 *
44 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
45 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
46 *
47 * There are two ways that we handle ioctls: the legacy way where almost
48 * all of the logic is in the ioctl callback, and the new way where most
49 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
50 *
51 * Non-legacy ioctls should be registered by calling
52 * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked
53 * from userland by lzc_ioctl().
54 *
55 * The registration arguments are as follows:
56 *
57 * const char *name
58 * The name of the ioctl. This is used for history logging. If the
59 * ioctl returns successfully (the callback returns 0), and allow_log
60 * is true, then a history log entry will be recorded with the input &
61 * output nvlists. The log entry can be printed with "zpool history -i".
62 *
63 * zfs_ioc_t ioc
64 * The ioctl request number, which userland will pass to ioctl(2).
65 * The ioctl numbers can change from release to release, because
66 * the caller (libzfs) must be matched to the kernel.
67 *
68 * zfs_secpolicy_func_t *secpolicy
69 * This function will be called before the zfs_ioc_func_t, to
70 * determine if this operation is permitted. It should return EPERM
71 * on failure, and 0 on success. Checks include determining if the
72 * dataset is visible in this zone, and if the user has either all
73 * zfs privileges in the zone (SYS_MOUNT), or has been granted permission
74 * to do this operation on this dataset with "zfs allow".
75 *
76 * zfs_ioc_namecheck_t namecheck
77 * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
78 * name, a dataset name, or nothing. If the name is not well-formed,
79 * the ioctl will fail and the callback will not be called.
80 * Therefore, the callback can assume that the name is well-formed
81 * (e.g. is null-terminated, doesn't have more than one '@' character,
82 * doesn't have invalid characters).
83 *
84 * zfs_ioc_poolcheck_t pool_check
85 * This specifies requirements on the pool state. If the pool does
86 * not meet them (is suspended or is readonly), the ioctl will fail
87 * and the callback will not be called. If any checks are specified
88 * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
89 * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
90 * POOL_CHECK_READONLY).
91 *
92 * boolean_t smush_outnvlist
93 * If smush_outnvlist is true, then the output is presumed to be a
94 * list of errors, and it will be "smushed" down to fit into the
95 * caller's buffer, by removing some entries and replacing them with a
96 * single "N_MORE_ERRORS" entry indicating how many were removed. See
97 * nvlist_smush() for details. If smush_outnvlist is false, and the
98 * outnvlist does not fit into the userland-provided buffer, then the
99 * ioctl will fail with ENOMEM.
100 *
101 * zfs_ioc_func_t *func
102 * The callback function that will perform the operation.
103 *
104 * The callback should return 0 on success, or an error number on
105 * failure. If the function fails, the userland ioctl will return -1,
106 * and errno will be set to the callback's return value. The callback
107 * will be called with the following arguments:
108 *
109 * const char *name
110 * The name of the pool or dataset to operate on, from
111 * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the
112 * expected type (pool, dataset, or none).
113 *
114 * nvlist_t *innvl
115 * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or
116 * NULL if no input nvlist was provided. Changes to this nvlist are
117 * ignored. If the input nvlist could not be deserialized, the
118 * ioctl will fail and the callback will not be called.
119 *
120 * nvlist_t *outnvl
121 * The output nvlist, initially empty. The callback can fill it in,
122 * and it will be returned to userland by serializing it into
123 * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization
124 * fails (e.g. because the caller didn't supply a large enough
125 * buffer), then the overall ioctl will fail. See the
126 * 'smush_nvlist' argument above for additional behaviors.
127 *
128 * There are two typical uses of the output nvlist:
129 * - To return state, e.g. property values. In this case,
130 * smush_outnvlist should be false. If the buffer was not large
131 * enough, the caller will reallocate a larger buffer and try
132 * the ioctl again.
133 *
134 * - To return multiple errors from an ioctl which makes on-disk
135 * changes. In this case, smush_outnvlist should be true.
136 * Ioctls which make on-disk modifications should generally not
137 * use the outnvl if they succeed, because the caller can not
138 * distinguish between the operation failing, and
139 * deserialization failing.
140 */
141
142 #include <sys/types.h>
143 #include <sys/param.h>
144 #include <sys/errno.h>
145 #include <sys/uio.h>
146 #include <sys/buf.h>
147 #include <sys/modctl.h>
148 #include <sys/open.h>
149 #include <sys/file.h>
150 #include <sys/kmem.h>
151 #include <sys/conf.h>
152 #include <sys/cmn_err.h>
153 #include <sys/stat.h>
154 #include <sys/zfs_ioctl.h>
155 #include <sys/zfs_vfsops.h>
156 #include <sys/zfs_znode.h>
157 #include <sys/zap.h>
158 #include <sys/spa.h>
159 #include <sys/spa_impl.h>
160 #include <sys/vdev.h>
161 #include <sys/priv_impl.h>
162 #include <sys/autosnap.h>
163 #include <sys/dmu.h>
164 #include <sys/dsl_dir.h>
165 #include <sys/dsl_dataset.h>
166 #include <sys/dsl_prop.h>
167 #include <sys/dsl_deleg.h>
168 #include <sys/dsl_synctask.h>
169 #include <sys/dmu_objset.h>
170 #include <sys/dmu_impl.h>
171 #include <sys/dmu_tx.h>
172 #include <sys/ddi.h>
173 #include <sys/sunddi.h>
174 #include <sys/sunldi.h>
175 #include <sys/policy.h>
176 #include <sys/zone.h>
177 #include <sys/nvpair.h>
178 #include <sys/pathname.h>
179 #include <sys/mount.h>
180 #include <sys/sdt.h>
181 #include <sys/fs/zfs.h>
182 #include <sys/zfs_ctldir.h>
183 #include <sys/zfs_dir.h>
184 #include <sys/zfs_onexit.h>
185 #include <sys/zvol.h>
186 #include <sys/dsl_scan.h>
187 #include <sharefs/share.h>
188 #include <sys/dmu_objset.h>
189 #include <sys/dmu_send.h>
190 #include <sys/dsl_destroy.h>
191 #include <sys/dsl_bookmark.h>
192 #include <sys/dsl_userhold.h>
193 #include <sys/zfeature.h>
194 #include <sys/cos.h>
195 #include <sys/cos_impl.h>
196 #include <sys/zfeature.h>
197 #include <sys/sysevent.h>
198 #include <sys/sysevent_impl.h>
199 #include <sys/zcp.h>
200 #include <sys/zio_checksum.h>
201
202 #include "zfs_namecheck.h"
203 #include "zfs_prop.h"
204 #include "zfs_deleg.h"
205 #include "zfs_comutil.h"
206 #include "zfs_errno.h"
207
208 #include "lua.h"
209 #include "lauxlib.h"
210
211 extern struct modlfs zfs_modlfs;
212
213 extern void zfs_init(void);
214 extern void zfs_fini(void);
215
216 ldi_ident_t zfs_li = NULL;
217 dev_info_t *zfs_dip;
218
219 uint_t zfs_fsyncer_key;
220 extern uint_t rrw_tsd_key;
221 static uint_t zfs_allow_log_key;
222
223 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
224 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
225 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
226
227 typedef enum {
228 NO_NAME,
229 POOL_NAME,
230 DATASET_NAME
231 } zfs_ioc_namecheck_t;
232
233 typedef enum {
234 POOL_CHECK_NONE = 1 << 0,
235 POOL_CHECK_SUSPENDED = 1 << 1,
236 POOL_CHECK_READONLY = 1 << 2,
237 } zfs_ioc_poolcheck_t;
238
239 typedef struct zfs_ioc_vec {
240 zfs_ioc_legacy_func_t *zvec_legacy_func;
241 zfs_ioc_func_t *zvec_func;
242 zfs_secpolicy_func_t *zvec_secpolicy;
243 zfs_ioc_namecheck_t zvec_namecheck;
244 boolean_t zvec_allow_log;
245 zfs_ioc_poolcheck_t zvec_pool_check;
246 boolean_t zvec_smush_outnvlist;
247 const char *zvec_name;
248 } zfs_ioc_vec_t;
249
250 /* This array is indexed by zfs_userquota_prop_t */
251 static const char *userquota_perms[] = {
252 ZFS_DELEG_PERM_USERUSED,
253 ZFS_DELEG_PERM_USERQUOTA,
254 ZFS_DELEG_PERM_GROUPUSED,
255 ZFS_DELEG_PERM_GROUPQUOTA,
256 };
257
258 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
259 static int zfs_check_settable(const char *name, nvpair_t *property,
260 cred_t *cr);
261 static int zfs_check_clearable(char *dataset, nvlist_t *props,
262 nvlist_t **errors);
263 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
264 boolean_t *);
265 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
266 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
267
268 static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
269
270 static int
271 zfs_is_wormed_ds(dsl_dataset_t *ds)
272 {
273 char worminfo[13] = {0};
274
275 if (dsl_prop_get_ds(ds, "nms:worm", 1, 12, &worminfo, NULL) == 0 &&
276 worminfo[0] && strcmp(worminfo, "0") != 0 &&
277 strcmp(worminfo, "off") != 0 && strcmp(worminfo, "-") != 0) {
278 return (1);
279 }
280 return (0);
281 }
282
283 static int
284 zfs_is_wormed(const char *name)
285 {
286 char worminfo[13] = {0};
287 char cname[MAXNAMELEN];
288 char *end;
289
290 (void) strlcpy(cname, name, MAXNAMELEN);
291 end = strchr(cname, '@');
292 if (end)
293 *end = 0;
294
295 if (dsl_prop_get(cname, "nms:worm", 1, 12, &worminfo, NULL) == 0 &&
296 worminfo[0] && strcmp(worminfo, "0") != 0 &&
297 strcmp(worminfo, "off") != 0 && strcmp(worminfo, "-") != 0) {
298 return (1);
299 }
300 return (0);
301 }
302
303 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
304 void
305 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
306 {
307 const char *newfile;
308 char buf[512];
309 va_list adx;
310
311 /*
312 * Get rid of annoying "../common/" prefix to filename.
313 */
314 newfile = strrchr(file, '/');
315 if (newfile != NULL) {
316 newfile = newfile + 1; /* Get rid of leading / */
317 } else {
318 newfile = file;
319 }
320
321 va_start(adx, fmt);
322 (void) vsnprintf(buf, sizeof (buf), fmt, adx);
323 va_end(adx);
324
325 /*
326 * To get this data, use the zfs-dprintf probe as so:
327 * dtrace -q -n 'zfs-dprintf \
328 * /stringof(arg0) == "dbuf.c"/ \
329 * {printf("%s: %s", stringof(arg1), stringof(arg3))}'
330 * arg0 = file name
331 * arg1 = function name
332 * arg2 = line number
333 * arg3 = message
334 */
335 DTRACE_PROBE4(zfs__dprintf,
336 char *, newfile, char *, func, int, line, char *, buf);
337 }
338
339 static void
340 history_str_free(char *buf)
341 {
342 kmem_free(buf, HIS_MAX_RECORD_LEN);
343 }
344
345 static char *
346 history_str_get(zfs_cmd_t *zc)
347 {
348 char *buf;
349
350 if (zc->zc_history == NULL)
351 return (NULL);
352
353 buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
354 if (copyinstr((void *)(uintptr_t)zc->zc_history,
355 buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
356 history_str_free(buf);
357 return (NULL);
358 }
359
360 buf[HIS_MAX_RECORD_LEN -1] = '\0';
361
362 return (buf);
363 }
364
365 /*
366 * Check to see if the named dataset is currently defined as bootable
367 */
368 static boolean_t
369 zfs_is_bootfs(const char *name)
370 {
371 objset_t *os;
372
373 if (dmu_objset_hold(name, FTAG, &os) == 0) {
374 boolean_t ret;
375 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
376 dmu_objset_rele(os, FTAG);
377 return (ret);
378 }
379 return (B_FALSE);
380 }
381
382 /*
383 * Return non-zero if the spa version is less than requested version.
384 */
385 static int
386 zfs_earlier_version(const char *name, int version)
387 {
388 spa_t *spa;
389
390 if (spa_open(name, &spa, FTAG) == 0) {
391 if (spa_version(spa) < version) {
392 spa_close(spa, FTAG);
393 return (1);
394 }
395 spa_close(spa, FTAG);
396 }
397 return (0);
398 }
399
400 /*
401 * Return TRUE if the ZPL version is less than requested version.
402 */
403 static boolean_t
404 zpl_earlier_version(const char *name, int version)
405 {
406 objset_t *os;
407 boolean_t rc = B_TRUE;
408
409 if (dmu_objset_hold(name, FTAG, &os) == 0) {
410 uint64_t zplversion;
411
412 if (dmu_objset_type(os) != DMU_OST_ZFS) {
413 dmu_objset_rele(os, FTAG);
414 return (B_TRUE);
415 }
416 /* XXX reading from non-owned objset */
417 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
418 rc = zplversion < version;
419 dmu_objset_rele(os, FTAG);
420 }
421 return (rc);
422 }
423
424 static void
425 zfs_log_history(zfs_cmd_t *zc)
426 {
427 spa_t *spa;
428 char *buf;
429
430 if ((buf = history_str_get(zc)) == NULL)
431 return;
432
433 if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
434 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
435 (void) spa_history_log(spa, buf);
436 spa_close(spa, FTAG);
437 }
438 history_str_free(buf);
439 }
440
441 /*
442 * Policy for top-level read operations (list pools). Requires no privileges,
443 * and can be used in the local zone, as there is no associated dataset.
444 */
445 /* ARGSUSED */
446 static int
447 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
448 {
449 return (0);
450 }
451
452 /*
453 * Policy for dataset read operations (list children, get statistics). Requires
454 * no privileges, but must be visible in the local zone.
455 */
456 /* ARGSUSED */
457 static int
458 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
459 {
460 if (INGLOBALZONE(curproc) ||
461 zone_dataset_visible(zc->zc_name, NULL))
462 return (0);
463
464 return (SET_ERROR(ENOENT));
465 }
466
467 static int
468 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
469 {
470 int writable = 1;
471
472 /*
473 * The dataset must be visible by this zone -- check this first
474 * so they don't see EPERM on something they shouldn't know about.
475 */
476 if (!INGLOBALZONE(curproc) &&
477 !zone_dataset_visible(dataset, &writable))
478 return (SET_ERROR(ENOENT));
479
480 if (INGLOBALZONE(curproc)) {
481 /*
482 * If the fs is zoned, only root can access it from the
483 * global zone.
484 */
485 if (secpolicy_zfs(cr) && zoned)
486 return (SET_ERROR(EPERM));
487 } else {
488 /*
489 * If we are in a local zone, the 'zoned' property must be set.
490 */
491 if (!zoned)
492 return (SET_ERROR(EPERM));
493
494 /* must be writable by this zone */
495 if (!writable)
496 return (SET_ERROR(EPERM));
497 }
498 return (0);
499 }
500
501 static int
502 zfs_dozonecheck(const char *dataset, cred_t *cr)
503 {
504 uint64_t zoned;
505
506 if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
507 return (SET_ERROR(ENOENT));
508
509 return (zfs_dozonecheck_impl(dataset, zoned, cr));
510 }
511
512 static int
513 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
514 {
515 uint64_t zoned;
516
517 if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
518 return (SET_ERROR(ENOENT));
519
520 return (zfs_dozonecheck_impl(dataset, zoned, cr));
521 }
522
523 static int
524 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
525 const char *perm, cred_t *cr)
526 {
527 int error;
528
529 error = zfs_dozonecheck_ds(name, ds, cr);
530 if (error == 0) {
531 error = secpolicy_zfs(cr);
532 if (error != 0)
533 error = dsl_deleg_access_impl(ds, perm, cr);
534 }
535 return (error);
536 }
537
538 static int
539 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
540 {
541 int error;
542 dsl_dataset_t *ds;
543 dsl_pool_t *dp;
544
545 /*
546 * First do a quick check for root in the global zone, which
547 * is allowed to do all write_perms. This ensures that zfs_ioc_*
548 * will get to handle nonexistent datasets.
549 */
550 if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
551 return (0);
552
553 error = dsl_pool_hold(name, FTAG, &dp);
554 if (error != 0)
555 return (error);
556
557 error = dsl_dataset_hold(dp, name, FTAG, &ds);
558 if (error != 0) {
559 dsl_pool_rele(dp, FTAG);
560 return (error);
561 }
562
563 error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
564
565 dsl_dataset_rele(ds, FTAG);
566 dsl_pool_rele(dp, FTAG);
567 return (error);
568 }
569
570 /*
571 * Policy for setting the security label property.
572 *
573 * Returns 0 for success, non-zero for access and other errors.
574 */
575 static int
576 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
577 {
578 char ds_hexsl[MAXNAMELEN];
579 bslabel_t ds_sl, new_sl;
580 boolean_t new_default = FALSE;
581 uint64_t zoned;
582 int needed_priv = -1;
583 int error;
584
585 /* First get the existing dataset label. */
586 error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
587 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
588 if (error != 0)
589 return (SET_ERROR(EPERM));
590
591 if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
592 new_default = TRUE;
593
594 /* The label must be translatable */
595 if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
596 return (SET_ERROR(EINVAL));
597
598 /*
599 * In a non-global zone, disallow attempts to set a label that
600 * doesn't match that of the zone; otherwise no other checks
601 * are needed.
602 */
603 if (!INGLOBALZONE(curproc)) {
604 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
605 return (SET_ERROR(EPERM));
606 return (0);
607 }
608
609 /*
610 * For global-zone datasets (i.e., those whose zoned property is
611 * "off", verify that the specified new label is valid for the
612 * global zone.
613 */
614 if (dsl_prop_get_integer(name,
615 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
616 return (SET_ERROR(EPERM));
617 if (!zoned) {
618 if (zfs_check_global_label(name, strval) != 0)
619 return (SET_ERROR(EPERM));
620 }
621
622 /*
623 * If the existing dataset label is nondefault, check if the
624 * dataset is mounted (label cannot be changed while mounted).
625 * Get the zfsvfs; if there isn't one, then the dataset isn't
626 * mounted (or isn't a dataset, doesn't exist, ...).
627 */
628 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
629 objset_t *os;
630 static char *setsl_tag = "setsl_tag";
631
632 /*
633 * Try to own the dataset; abort if there is any error,
634 * (e.g., already mounted, in use, or other error).
635 */
636 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
637 setsl_tag, &os);
638 if (error != 0)
639 return (SET_ERROR(EPERM));
640
641 dmu_objset_disown(os, setsl_tag);
642
643 if (new_default) {
644 needed_priv = PRIV_FILE_DOWNGRADE_SL;
645 goto out_check;
646 }
647
648 if (hexstr_to_label(strval, &new_sl) != 0)
649 return (SET_ERROR(EPERM));
650
651 if (blstrictdom(&ds_sl, &new_sl))
652 needed_priv = PRIV_FILE_DOWNGRADE_SL;
653 else if (blstrictdom(&new_sl, &ds_sl))
654 needed_priv = PRIV_FILE_UPGRADE_SL;
655 } else {
656 /* dataset currently has a default label */
657 if (!new_default)
658 needed_priv = PRIV_FILE_UPGRADE_SL;
659 }
660
661 out_check:
662 if (needed_priv != -1)
663 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
664 return (0);
665 }
666
667 static int
668 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
669 cred_t *cr)
670 {
671 char *strval;
672
673 /*
674 * Check permissions for special properties.
675 */
676 switch (prop) {
677 case ZFS_PROP_ZONED:
678 /*
679 * Disallow setting of 'zoned' from within a local zone.
680 */
681 if (!INGLOBALZONE(curproc))
682 return (SET_ERROR(EPERM));
683 break;
684
685 case ZFS_PROP_QUOTA:
686 case ZFS_PROP_FILESYSTEM_LIMIT:
687 case ZFS_PROP_SNAPSHOT_LIMIT:
688 if (!INGLOBALZONE(curproc)) {
689 uint64_t zoned;
690 char setpoint[ZFS_MAX_DATASET_NAME_LEN];
691 /*
692 * Unprivileged users are allowed to modify the
693 * limit on things *under* (ie. contained by)
694 * the thing they own.
695 */
696 if (dsl_prop_get_integer(dsname, "zoned", &zoned,
697 setpoint))
698 return (SET_ERROR(EPERM));
699 if (!zoned || strlen(dsname) <= strlen(setpoint))
700 return (SET_ERROR(EPERM));
701 }
702 break;
703
704 case ZFS_PROP_MLSLABEL:
705 if (!is_system_labeled())
706 return (SET_ERROR(EPERM));
707
708 if (nvpair_value_string(propval, &strval) == 0) {
709 int err;
710
711 err = zfs_set_slabel_policy(dsname, strval, CRED());
712 if (err != 0)
713 return (err);
714 }
715 break;
716 }
717
718 return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
719 }
720
721 /* ARGSUSED */
722 static int
723 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
724 {
725 int error;
726
727 error = zfs_dozonecheck(zc->zc_name, cr);
728 if (error != 0)
729 return (error);
730
731 /*
732 * permission to set permissions will be evaluated later in
733 * dsl_deleg_can_allow()
734 */
735 return (0);
736 }
737
738 /* ARGSUSED */
739 static int
740 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
741 {
742 return (zfs_secpolicy_write_perms(zc->zc_name,
743 ZFS_DELEG_PERM_ROLLBACK, cr));
744 }
745
746 /* ARGSUSED */
747 static int
748 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
749 {
750 dsl_pool_t *dp;
751 dsl_dataset_t *ds;
752 char *cp;
753 int error;
754
755 /*
756 * Generate the current snapshot name from the given objsetid, then
757 * use that name for the secpolicy/zone checks.
758 */
759 cp = strchr(zc->zc_name, '@');
760 if (cp == NULL)
761 return (SET_ERROR(EINVAL));
762 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
763 if (error != 0)
764 return (error);
765
766 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
767 if (error != 0) {
768 dsl_pool_rele(dp, FTAG);
769 return (error);
770 }
771
772 dsl_dataset_name(ds, zc->zc_name);
773
774 error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
775 ZFS_DELEG_PERM_SEND, cr);
776 dsl_dataset_rele(ds, FTAG);
777 dsl_pool_rele(dp, FTAG);
778
779 return (error);
780 }
781
782 /* ARGSUSED */
783 static int
784 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
785 {
786 return (zfs_secpolicy_write_perms(zc->zc_name,
787 ZFS_DELEG_PERM_SEND, cr));
788 }
789
790 /* ARGSUSED */
791 static int
792 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
793 {
794 vnode_t *vp;
795 int error;
796
797 if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
798 NO_FOLLOW, NULL, &vp)) != 0)
799 return (error);
800
801 /* Now make sure mntpnt and dataset are ZFS */
802
803 if (vp->v_vfsp->vfs_fstype != zfsfstype ||
804 (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
805 zc->zc_name) != 0)) {
806 VN_RELE(vp);
807 return (SET_ERROR(EPERM));
808 }
809
810 VN_RELE(vp);
811 return (dsl_deleg_access(zc->zc_name,
812 ZFS_DELEG_PERM_SHARE, cr));
813 }
814
815 int
816 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
817 {
818 if (secpolicy_nfs(cr) == 0) {
819 return (0);
820 } else {
821 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
822 }
823 }
824
825 int
826 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
827 {
828 if (secpolicy_smb(cr) == 0) {
829 return (0);
830 } else {
831 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
832 }
833 }
834
835 static int
836 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
837 {
838 char *cp;
839
840 /*
841 * Remove the @bla or /bla from the end of the name to get the parent.
842 */
843 (void) strncpy(parent, datasetname, parentsize);
844 cp = strrchr(parent, '@');
845 if (cp != NULL) {
846 cp[0] = '\0';
847 } else {
848 cp = strrchr(parent, '/');
849 if (cp == NULL)
850 return (SET_ERROR(ENOENT));
851 cp[0] = '\0';
852 }
853
854 return (0);
855 }
856
857 int
858 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
859 {
860 int error;
861
862 if ((error = zfs_secpolicy_write_perms(name,
863 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
864 return (error);
865
866 return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
867 }
868
869 /* ARGSUSED */
870 static int
871 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
872 {
873 return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
874 }
875
876 /*
877 * Destroying snapshots with delegated permissions requires
878 * descendant mount and destroy permissions.
879 */
880 /* ARGSUSED */
881 static int
882 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
883 {
884 nvlist_t *snaps;
885 nvpair_t *pair, *nextpair;
886 int error = 0;
887
888 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
889 return (SET_ERROR(EINVAL));
890 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
891 pair = nextpair) {
892 nextpair = nvlist_next_nvpair(snaps, pair);
893 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
894 if (error == ENOENT) {
895 /*
896 * Ignore any snapshots that don't exist (we consider
897 * them "already destroyed"). Remove the name from the
898 * nvl here in case the snapshot is created between
899 * now and when we try to destroy it (in which case
900 * we don't want to destroy it since we haven't
901 * checked for permission).
902 */
903 fnvlist_remove_nvpair(snaps, pair);
904 error = 0;
905 }
906 if (error != 0)
907 break;
908 }
909
910 return (error);
911 }
912
913 int
914 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
915 {
916 char parentname[ZFS_MAX_DATASET_NAME_LEN];
917 int error;
918
919 if ((error = zfs_secpolicy_write_perms(from,
920 ZFS_DELEG_PERM_RENAME, cr)) != 0)
921 return (error);
922
923 if ((error = zfs_secpolicy_write_perms(from,
924 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
925 return (error);
926
927 if ((error = zfs_get_parent(to, parentname,
928 sizeof (parentname))) != 0)
929 return (error);
930
931 if ((error = zfs_secpolicy_write_perms(parentname,
932 ZFS_DELEG_PERM_CREATE, cr)) != 0)
933 return (error);
934
935 if ((error = zfs_secpolicy_write_perms(parentname,
936 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
937 return (error);
938
939 return (error);
940 }
941
942 /* ARGSUSED */
943 static int
944 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
945 {
946 return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
947 }
948
949 /* ARGSUSED */
950 static int
951 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
952 {
953 dsl_pool_t *dp;
954 dsl_dataset_t *clone;
955 int error;
956
957 error = zfs_secpolicy_write_perms(zc->zc_name,
958 ZFS_DELEG_PERM_PROMOTE, cr);
959 if (error != 0)
960 return (error);
961
962 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
963 if (error != 0)
964 return (error);
965
966 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
967
968 if (error == 0) {
969 char parentname[ZFS_MAX_DATASET_NAME_LEN];
970 dsl_dataset_t *origin = NULL;
971 dsl_dir_t *dd;
972 dd = clone->ds_dir;
973
974 error = dsl_dataset_hold_obj(dd->dd_pool,
975 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
976 if (error != 0) {
977 dsl_dataset_rele(clone, FTAG);
978 dsl_pool_rele(dp, FTAG);
979 return (error);
980 }
981
982 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
983 ZFS_DELEG_PERM_MOUNT, cr);
984
985 dsl_dataset_name(origin, parentname);
986 if (error == 0) {
987 error = zfs_secpolicy_write_perms_ds(parentname, origin,
988 ZFS_DELEG_PERM_PROMOTE, cr);
989 }
990 dsl_dataset_rele(clone, FTAG);
991 dsl_dataset_rele(origin, FTAG);
992 }
993 dsl_pool_rele(dp, FTAG);
994 return (error);
995 }
996
997 /* ARGSUSED */
998 static int
999 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1000 {
1001 int error;
1002
1003 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1004 ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
1005 return (error);
1006
1007 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1008 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1009 return (error);
1010
1011 return (zfs_secpolicy_write_perms(zc->zc_name,
1012 ZFS_DELEG_PERM_CREATE, cr));
1013 }
1014
1015 int
1016 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1017 {
1018 return (zfs_secpolicy_write_perms(name,
1019 ZFS_DELEG_PERM_SNAPSHOT, cr));
1020 }
1021
1022 /*
1023 * Check for permission to create each snapshot in the nvlist.
1024 */
1025 /* ARGSUSED */
1026 static int
1027 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1028 {
1029 nvlist_t *snaps;
1030 int error = 0;
1031 nvpair_t *pair;
1032
1033 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1034 return (SET_ERROR(EINVAL));
1035 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1036 pair = nvlist_next_nvpair(snaps, pair)) {
1037 char *name = nvpair_name(pair);
1038 char *atp = strchr(name, '@');
1039
1040 if (atp == NULL) {
1041 error = SET_ERROR(EINVAL);
1042 break;
1043 }
1044 *atp = '\0';
1045 error = zfs_secpolicy_snapshot_perms(name, cr);
1046 *atp = '@';
1047 if (error != 0)
1048 break;
1049 }
1050 return (error);
1051 }
1052
1053 /*
1054 * Check for permission to create each snapshot in the nvlist.
1055 */
1056 /* ARGSUSED */
1057 static int
1058 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1059 {
1060 int error = 0;
1061
1062 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1063 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1064 char *name = nvpair_name(pair);
1065 char *hashp = strchr(name, '#');
1066
1067 if (hashp == NULL) {
1068 error = SET_ERROR(EINVAL);
1069 break;
1070 }
1071 *hashp = '\0';
1072 error = zfs_secpolicy_write_perms(name,
1073 ZFS_DELEG_PERM_BOOKMARK, cr);
1074 *hashp = '#';
1075 if (error != 0)
1076 break;
1077 }
1078 return (error);
1079 }
1080
1081 /* ARGSUSED */
1082 static int
1083 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1084 {
1085 nvpair_t *pair, *nextpair;
1086 int error = 0;
1087
1088 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1089 pair = nextpair) {
1090 char *name = nvpair_name(pair);
1091 char *hashp = strchr(name, '#');
1092 nextpair = nvlist_next_nvpair(innvl, pair);
1093
1094 if (hashp == NULL) {
1095 error = SET_ERROR(EINVAL);
1096 break;
1097 }
1098
1099 *hashp = '\0';
1100 error = zfs_secpolicy_write_perms(name,
1101 ZFS_DELEG_PERM_DESTROY, cr);
1102 *hashp = '#';
1103 if (error == ENOENT) {
1104 /*
1105 * Ignore any filesystems that don't exist (we consider
1106 * their bookmarks "already destroyed"). Remove
1107 * the name from the nvl here in case the filesystem
1108 * is created between now and when we try to destroy
1109 * the bookmark (in which case we don't want to
1110 * destroy it since we haven't checked for permission).
1111 */
1112 fnvlist_remove_nvpair(innvl, pair);
1113 error = 0;
1114 }
1115 if (error != 0)
1116 break;
1117 }
1118
1119 return (error);
1120 }
1121
1122 /* ARGSUSED */
1123 static int
1124 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1125 {
1126 /*
1127 * Even root must have a proper TSD so that we know what pool
1128 * to log to.
1129 */
1130 if (tsd_get(zfs_allow_log_key) == NULL)
1131 return (SET_ERROR(EPERM));
1132 return (0);
1133 }
1134
1135 static int
1136 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1137 {
1138 char parentname[ZFS_MAX_DATASET_NAME_LEN];
1139 int error;
1140 char *origin;
1141
1142 if ((error = zfs_get_parent(zc->zc_name, parentname,
1143 sizeof (parentname))) != 0)
1144 return (error);
1145
1146 if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1147 (error = zfs_secpolicy_write_perms(origin,
1148 ZFS_DELEG_PERM_CLONE, cr)) != 0)
1149 return (error);
1150
1151 if ((error = zfs_secpolicy_write_perms(parentname,
1152 ZFS_DELEG_PERM_CREATE, cr)) != 0)
1153 return (error);
1154
1155 return (zfs_secpolicy_write_perms(parentname,
1156 ZFS_DELEG_PERM_MOUNT, cr));
1157 }
1158
1159 /*
1160 * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires
1161 * SYS_CONFIG privilege, which is not available in a local zone.
1162 */
1163 /* ARGSUSED */
1164 static int
1165 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1166 {
1167 if (secpolicy_sys_config(cr, B_FALSE) != 0)
1168 return (SET_ERROR(EPERM));
1169
1170 return (0);
1171 }
1172
1173 /*
1174 * Policy for object to name lookups.
1175 */
1176 /* ARGSUSED */
1177 static int
1178 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1179 {
1180 int error;
1181
1182 if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1183 return (0);
1184
1185 error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1186 return (error);
1187 }
1188
1189 /*
1190 * Policy for fault injection. Requires all privileges.
1191 */
1192 /* ARGSUSED */
1193 static int
1194 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1195 {
1196 return (secpolicy_zinject(cr));
1197 }
1198
1199 /* ARGSUSED */
1200 static int
1201 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1202 {
1203 zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1204
1205 if (prop == ZPROP_INVAL) {
1206 if (!zfs_prop_user(zc->zc_value))
1207 return (SET_ERROR(EINVAL));
1208 return (zfs_secpolicy_write_perms(zc->zc_name,
1209 ZFS_DELEG_PERM_USERPROP, cr));
1210 } else {
1211 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1212 NULL, cr));
1213 }
1214 }
1215
1216 static int
1217 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1218 {
1219 int err = zfs_secpolicy_read(zc, innvl, cr);
1220 if (err)
1221 return (err);
1222
1223 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1224 return (SET_ERROR(EINVAL));
1225
1226 if (zc->zc_value[0] == 0) {
1227 /*
1228 * They are asking about a posix uid/gid. If it's
1229 * themself, allow it.
1230 */
1231 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1232 zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1233 if (zc->zc_guid == crgetuid(cr))
1234 return (0);
1235 } else {
1236 if (groupmember(zc->zc_guid, cr))
1237 return (0);
1238 }
1239 }
1240
1241 return (zfs_secpolicy_write_perms(zc->zc_name,
1242 userquota_perms[zc->zc_objset_type], cr));
1243 }
1244
1245 static int
1246 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1247 {
1248 int err = zfs_secpolicy_read(zc, innvl, cr);
1249 if (err)
1250 return (err);
1251
1252 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1253 return (SET_ERROR(EINVAL));
1254
1255 return (zfs_secpolicy_write_perms(zc->zc_name,
1256 userquota_perms[zc->zc_objset_type], cr));
1257 }
1258
1259 /* ARGSUSED */
1260 static int
1261 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1262 {
1263 return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1264 NULL, cr));
1265 }
1266
1267 /* ARGSUSED */
1268 static int
1269 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1270 {
1271 nvpair_t *pair;
1272 nvlist_t *holds;
1273 int error;
1274
1275 error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1276 if (error != 0)
1277 return (SET_ERROR(EINVAL));
1278
1279 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1280 pair = nvlist_next_nvpair(holds, pair)) {
1281 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1282 error = dmu_fsname(nvpair_name(pair), fsname);
1283 if (error != 0)
1284 return (error);
1285 error = zfs_secpolicy_write_perms(fsname,
1286 ZFS_DELEG_PERM_HOLD, cr);
1287 if (error != 0)
1288 return (error);
1289 }
1290 return (0);
1291 }
1292
1293 /* ARGSUSED */
1294 static int
1295 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1296 {
1297 nvpair_t *pair;
1298 int error;
1299
1300 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1301 pair = nvlist_next_nvpair(innvl, pair)) {
1302 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1303 error = dmu_fsname(nvpair_name(pair), fsname);
1304 if (error != 0)
1305 return (error);
1306 error = zfs_secpolicy_write_perms(fsname,
1307 ZFS_DELEG_PERM_RELEASE, cr);
1308 if (error != 0)
1309 return (error);
1310 }
1311 return (0);
1312 }
1313
1314 /*
1315 * Policy for allowing temporary snapshots to be taken or released
1316 */
1317 static int
1318 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1319 {
1320 /*
1321 * A temporary snapshot is the same as a snapshot,
1322 * hold, destroy and release all rolled into one.
1323 * Delegated diff alone is sufficient that we allow this.
1324 */
1325 int error;
1326
1327 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1328 ZFS_DELEG_PERM_DIFF, cr)) == 0)
1329 return (0);
1330
1331 error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1332 if (error == 0)
1333 error = zfs_secpolicy_hold(zc, innvl, cr);
1334 if (error == 0)
1335 error = zfs_secpolicy_release(zc, innvl, cr);
1336 if (error == 0)
1337 error = zfs_secpolicy_destroy(zc, innvl, cr);
1338 return (error);
1339 }
1340
1341 /*
1342 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1343 */
1344 static int
1345 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1346 {
1347 char *packed;
1348 int error;
1349 nvlist_t *list = NULL;
1350
1351 /*
1352 * Read in and unpack the user-supplied nvlist.
1353 */
1354 if (size == 0)
1355 return (SET_ERROR(EINVAL));
1356
1357 packed = kmem_alloc(size, KM_SLEEP);
1358
1359 if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1360 iflag)) != 0) {
1361 kmem_free(packed, size);
1362 return (SET_ERROR(EFAULT));
1363 }
1364
1365 if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1366 kmem_free(packed, size);
1367 return (error);
1368 }
1369
1370 kmem_free(packed, size);
1371
1372 *nvp = list;
1373 return (0);
1374 }
1375
1376 /*
1377 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1378 * Entries will be removed from the end of the nvlist, and one int32 entry
1379 * named "N_MORE_ERRORS" will be added indicating how many entries were
1380 * removed.
1381 */
1382 static int
1383 nvlist_smush(nvlist_t *errors, size_t max)
1384 {
1385 size_t size;
1386
1387 size = fnvlist_size(errors);
1388
1389 if (size > max) {
1390 nvpair_t *more_errors;
1391 int n = 0;
1392
1393 if (max < 1024)
1394 return (SET_ERROR(ENOMEM));
1395
1396 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1397 more_errors = nvlist_prev_nvpair(errors, NULL);
1398
1399 do {
1400 nvpair_t *pair = nvlist_prev_nvpair(errors,
1401 more_errors);
1402 fnvlist_remove_nvpair(errors, pair);
1403 n++;
1404 size = fnvlist_size(errors);
1405 } while (size > max);
1406
1407 fnvlist_remove_nvpair(errors, more_errors);
1408 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1409 ASSERT3U(fnvlist_size(errors), <=, max);
1410 }
1411
1412 return (0);
1413 }
1414
1415 /*
1416 * Callers will know whether there's anything to unpack based on ret non-0/errno
1417 * set to ENOMEM, but observers (e.g truss) need the message properly marked to
1418 * know if it should be unpacked and displayed. Don't marked as filled unless
1419 * completely successful. If there's a non-empty nvlist, set size to its nvl
1420 * size as resize hint.
1421 */
1422 static int
1423 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1424 {
1425 char *packed = NULL;
1426 int error = 0;
1427 size_t size;
1428
1429 size = fnvlist_size(nvl);
1430
1431 zc->zc_nvlist_dst_filled = B_FALSE;
1432 if (size > zc->zc_nvlist_dst_size) {
1433 error = SET_ERROR(ENOMEM);
1434 } else {
1435 packed = fnvlist_pack(nvl, &size);
1436 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1437 size, zc->zc_iflags) != 0)
1438 error = SET_ERROR(EFAULT);
1439 else
1440 zc->zc_nvlist_dst_filled = B_TRUE;
1441 fnvlist_pack_free(packed, size);
1442 }
1443
1444 zc->zc_nvlist_dst_size = size;
1445 return (error);
1446 }
1447
1448 static int
1449 getzfsvfs_from_ds(dsl_dataset_t *ds, zfsvfs_t **zfvp)
1450 {
1451 objset_t *os;
1452 int error;
1453 dsl_pool_t *dp;
1454
1455 dp = ds->ds_dir->dd_pool;
1456 dsl_pool_config_enter(dp, FTAG);
1457
1458 /*
1459 * IU: we probably need to hold dataset here.
1460 * For now let's assume we do.
1461 * May need revision later.
1462 */
1463 dsl_dataset_long_hold(ds, FTAG);
1464 error = dmu_objset_from_ds(ds, &os);
1465 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1466 dsl_dataset_long_rele(ds, FTAG);
1467 dsl_pool_config_exit(dp, FTAG);
1468 return (EINVAL);
1469 }
1470
1471 mutex_enter(&os->os_user_ptr_lock);
1472 *zfvp = dmu_objset_get_user(os);
1473 if (*zfvp) {
1474 VFS_HOLD((*zfvp)->z_vfs);
1475 } else {
1476 error = ESRCH;
1477 }
1478 mutex_exit(&os->os_user_ptr_lock);
1479 dsl_dataset_long_rele(ds, FTAG);
1480 dsl_pool_config_exit(dp, FTAG);
1481 return (error);
1482 }
1483
1484 int
1485 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1486 {
1487 int error = 0;
1488 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1489 return (SET_ERROR(EINVAL));
1490 }
1491
1492 mutex_enter(&os->os_user_ptr_lock);
1493 *zfvp = dmu_objset_get_user(os);
1494 if (*zfvp) {
1495 VFS_HOLD((*zfvp)->z_vfs);
1496 } else {
1497 error = SET_ERROR(ESRCH);
1498 }
1499 mutex_exit(&os->os_user_ptr_lock);
1500 return (error);
1501 }
1502
1503 int
1504 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1505 {
1506 objset_t *os;
1507 int error;
1508
1509 error = dmu_objset_hold(dsname, FTAG, &os);
1510 if (error != 0)
1511 return (error);
1512
1513 error = getzfsvfs_impl(os, zfvp);
1514 dmu_objset_rele(os, FTAG);
1515 return (error);
1516 }
1517
1518 /*
1519 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1520 * case its z_vfs will be NULL, and it will be opened as the owner.
1521 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1522 * which prevents all vnode ops from running.
1523 */
1524 static int
1525 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1526 {
1527 int error = 0;
1528
1529 if (getzfsvfs(name, zfvp) != 0)
1530 error = zfsvfs_create(name, zfvp);
1531 if (error == 0) {
1532 rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1533 RW_READER, tag);
1534 if ((*zfvp)->z_unmounted) {
1535 /*
1536 * XXX we could probably try again, since the unmounting
1537 * thread should be just about to disassociate the
1538 * objset from the zfsvfs.
1539 */
1540 rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1541 return (SET_ERROR(EBUSY));
1542 }
1543 }
1544 return (error);
1545 }
1546
1547 static void
1548 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1549 {
1550 rrm_exit(&zfsvfs->z_teardown_lock, tag);
1551
1552 if (zfsvfs->z_vfs) {
1553 VFS_RELE(zfsvfs->z_vfs);
1554 } else {
1555 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1556 zfsvfs_free(zfsvfs);
1557 }
1558 }
1559
1560
1561 /*
1562 * Publish events using GPEC subsystem
1563 */
1564
1565 static evchan_t *zfs_channel = NULL;
1566
1567 void
1568 zfs_event_post(const char *subclass, const char *operation, nvlist_t *ev_data)
1569 {
1570
1571 if (zfs_channel == NULL)
1572 goto out;
1573
1574 fnvlist_add_string(ev_data, "operation", operation);
1575
1576 (void) sysevent_evc_publish(zfs_channel, subclass, operation,
1577 "com.nexenta", "zfs-kernel", ev_data, EVCH_NOSLEEP);
1578
1579 out:
1580 fnvlist_free(ev_data);
1581 }
1582
1583 static int
1584 zfs_ioc_pool_create(zfs_cmd_t *zc)
1585 {
1586 int error;
1587 nvlist_t *config, *props = NULL;
1588 nvlist_t *rootprops = NULL;
1589 nvlist_t *zplprops = NULL;
1590 nvlist_t *event;
1591
1592 if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1593 zc->zc_iflags, &config))
1594 return (error);
1595
1596 if (zc->zc_nvlist_src_size != 0 && (error =
1597 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1598 zc->zc_iflags, &props))) {
1599 nvlist_free(config);
1600 return (error);
1601 }
1602
1603 if (props) {
1604 nvlist_t *nvl = NULL;
1605 uint64_t version = SPA_VERSION;
1606
1607 (void) nvlist_lookup_uint64(props,
1608 zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1609 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1610 error = SET_ERROR(EINVAL);
1611 goto pool_props_bad;
1612 }
1613 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1614 if (nvl) {
1615 error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1616 if (error != 0) {
1617 nvlist_free(config);
1618 nvlist_free(props);
1619 return (error);
1620 }
1621 (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1622 }
1623 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1624 error = zfs_fill_zplprops_root(version, rootprops,
1625 zplprops, NULL);
1626 if (error != 0)
1627 goto pool_props_bad;
1628 }
1629
1630 error = spa_create(zc->zc_name, config, props, zplprops);
1631
1632 /*
1633 * Set the remaining root properties
1634 */
1635 if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1636 ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1637 (void) spa_destroy(zc->zc_name);
1638
1639 if (error == 0) {
1640 event = fnvlist_alloc();
1641 fnvlist_add_string(event, "name", zc->zc_name);
1642 fnvlist_add_nvlist(event, "config", config);
1643 if (props != NULL)
1644 fnvlist_add_nvlist(event, "props", props);
1645 zfs_event_post(ZPOOL_EC_STATUS, "create", event);
1646 }
1647
1648 pool_props_bad:
1649 nvlist_free(rootprops);
1650 nvlist_free(zplprops);
1651 nvlist_free(config);
1652 nvlist_free(props);
1653
1654 return (error);
1655 }
1656
1657 static int
1658 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1659 {
1660 int error;
1661 nvlist_t *event;
1662 zfs_log_history(zc);
1663 error = spa_destroy(zc->zc_name);
1664 if (error == 0) {
1665 zvol_remove_minors(zc->zc_name);
1666 event = fnvlist_alloc();
1667 fnvlist_add_string(event, "pool", zc->zc_name);
1668 zfs_event_post(ZPOOL_EC_STATUS, "destroy", event);
1669 }
1670 return (error);
1671 }
1672
1673 static int
1674 zfs_ioc_pool_import(zfs_cmd_t *zc)
1675 {
1676 nvlist_t *config, *props = NULL;
1677 uint64_t guid;
1678 int error;
1679 nvlist_t *event;
1680
1681 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1682 zc->zc_iflags, &config)) != 0)
1683 return (error);
1684
1685 if (zc->zc_nvlist_src_size != 0 && (error =
1686 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1687 zc->zc_iflags, &props))) {
1688 nvlist_free(config);
1689 return (error);
1690 }
1691
1692 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1693 guid != zc->zc_guid)
1694 error = SET_ERROR(EINVAL);
1695 else
1696 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1697
1698 if (error == 0) {
1699 event = fnvlist_alloc();
1700 fnvlist_add_string(event, "pool", zc->zc_name);
1701 fnvlist_add_uint64(event, "guid", zc->zc_guid);
1702 fnvlist_add_nvlist(event, "config", config);
1703 if (props != NULL)
1704 fnvlist_add_nvlist(event, "props", props);
1705 zfs_event_post(ZPOOL_EC_STATUS, "import", event);
1706 }
1707
1708 if (zc->zc_nvlist_dst != 0) {
1709 int err;
1710
1711 if ((err = put_nvlist(zc, config)) != 0)
1712 error = err;
1713 }
1714
1715 nvlist_free(config);
1716
1717 nvlist_free(props);
1718
1719 return (error);
1720 }
1721
1722 static int
1723 zfs_ioc_pool_export(zfs_cmd_t *zc)
1724 {
1725 int error;
1726 boolean_t force = (boolean_t)zc->zc_cookie;
1727 boolean_t hardforce = (boolean_t)zc->zc_guid;
1728 boolean_t saveconfig = (boolean_t)zc->zc_obj;
1729 nvlist_t *event;
1730
1731 zfs_log_history(zc);
1732 error = spa_export(zc->zc_name, NULL, force, hardforce, saveconfig);
1733 if (error == 0) {
1734 zvol_remove_minors(zc->zc_name);
1735 event = fnvlist_alloc();
1736 fnvlist_add_string(event, "pool", zc->zc_name);
1737 zfs_event_post(ZPOOL_EC_STATUS, "export", event);
1738 }
1739 return (error);
1740 }
1741
1742 static int
1743 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1744 {
1745 nvlist_t *configs;
1746 int error;
1747
1748 if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1749 return (SET_ERROR(EEXIST));
1750
1751 error = put_nvlist(zc, configs);
1752
1753 nvlist_free(configs);
1754
1755 return (error);
1756 }
1757
1758 /*
1759 * inputs:
1760 * zc_name name of the pool
1761 *
1762 * outputs:
1763 * zc_cookie real errno
1764 * zc_nvlist_dst config nvlist
1765 * zc_nvlist_dst_size size of config nvlist
1766 */
1767 static int
1768 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1769 {
1770 nvlist_t *config;
1771 int error;
1772 int ret = 0;
1773
1774 error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1775 sizeof (zc->zc_value));
1776
1777 if (config != NULL) {
1778 ret = put_nvlist(zc, config);
1779 nvlist_free(config);
1780
1781 /*
1782 * The config may be present even if 'error' is non-zero.
1783 * In this case we return success, and preserve the real errno
1784 * in 'zc_cookie'.
1785 */
1786 zc->zc_cookie = error;
1787 } else {
1788 ret = error;
1789 }
1790
1791 return (ret);
1792 }
1793
1794 /*
1795 * Try to import the given pool, returning pool stats as appropriate so that
1796 * user land knows which devices are available and overall pool health.
1797 */
1798 static int
1799 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1800 {
1801 nvlist_t *tryconfig, *config;
1802 int error;
1803
1804 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1805 zc->zc_iflags, &tryconfig)) != 0)
1806 return (error);
1807
1808 config = spa_tryimport(tryconfig);
1809
1810 nvlist_free(tryconfig);
1811
1812 if (config == NULL)
1813 return (SET_ERROR(EINVAL));
1814
1815 error = put_nvlist(zc, config);
1816 nvlist_free(config);
1817
1818 return (error);
1819 }
1820
1821 /*
1822 * inputs:
1823 * zc_name name of the pool
1824 * zc_cookie scan func (pool_scan_func_t)
1825 * zc_flags scrub pause/resume flag (pool_scrub_cmd_t)
1826 */
1827 static int
1828 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1829 {
1830 spa_t *spa;
1831 int error;
1832
1833 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1834 return (error);
1835
1836 if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1837 return (SET_ERROR(EINVAL));
1838
1839 if (zc->zc_flags == POOL_SCRUB_PAUSE)
1840 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1841 else if (zc->zc_cookie == POOL_SCAN_NONE)
1842 error = spa_scan_stop(spa);
1843 else
1844 error = spa_scan(spa, zc->zc_cookie);
1845
1846 spa_close(spa, FTAG);
1847
1848 return (error);
1849 }
1850
1851 /*
1852 * inputs:
1853 * zc_name name of the pool
1854 * zc_cookie trim_cmd_info_t
1855 */
1856 static int
1857 zfs_ioc_pool_trim(zfs_cmd_t *zc)
1858 {
1859 spa_t *spa;
1860 int error;
1861 trim_cmd_info_t tci;
1862
1863 if (ddi_copyin((void *)(uintptr_t)zc->zc_cookie, &tci,
1864 sizeof (tci), 0) == -1)
1865 return (EFAULT);
1866
1867 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1868 return (error);
1869
1870 if (tci.tci_start) {
1871 spa_man_trim(spa, tci.tci_rate);
1872 } else {
1873 spa_man_trim_stop(spa);
1874 }
1875
1876 spa_close(spa, FTAG);
1877
1878 return (error);
1879 }
1880
1881 static int
1882 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1883 {
1884 spa_t *spa;
1885 int error;
1886
1887 error = spa_open(zc->zc_name, &spa, FTAG);
1888 if (error == 0) {
1889 spa_freeze(spa);
1890 spa_close(spa, FTAG);
1891 }
1892 return (error);
1893 }
1894
1895 static int
1896 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1897 {
1898 spa_t *spa;
1899 int error;
1900
1901 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1902 return (error);
1903
1904 if (zc->zc_cookie < spa_version(spa) ||
1905 !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1906 spa_close(spa, FTAG);
1907 return (SET_ERROR(EINVAL));
1908 }
1909
1910 spa_upgrade(spa, zc->zc_cookie);
1911 spa_close(spa, FTAG);
1912
1913 return (error);
1914 }
1915
1916 static int
1917 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1918 {
1919 spa_t *spa;
1920 char *hist_buf;
1921 uint64_t size;
1922 int error;
1923
1924 if ((size = zc->zc_history_len) == 0)
1925 return (SET_ERROR(EINVAL));
1926
1927 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1928 return (error);
1929
1930 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1931 spa_close(spa, FTAG);
1932 return (SET_ERROR(ENOTSUP));
1933 }
1934
1935 hist_buf = kmem_alloc(size, KM_SLEEP);
1936 if ((error = spa_history_get(spa, &zc->zc_history_offset,
1937 &zc->zc_history_len, hist_buf)) == 0) {
1938 error = ddi_copyout(hist_buf,
1939 (void *)(uintptr_t)zc->zc_history,
1940 zc->zc_history_len, zc->zc_iflags);
1941 }
1942
1943 spa_close(spa, FTAG);
1944 kmem_free(hist_buf, size);
1945 return (error);
1946 }
1947
1948 static int
1949 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1950 {
1951 spa_t *spa;
1952 int error;
1953
1954 error = spa_open(zc->zc_name, &spa, FTAG);
1955 if (error == 0) {
1956 error = spa_change_guid(spa);
1957 spa_close(spa, FTAG);
1958 }
1959 return (error);
1960 }
1961
1962 static int
1963 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1964 {
1965 return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1966 }
1967
1968 /*
1969 * inputs:
1970 * zc_name name of filesystem
1971 * zc_obj object to find
1972 *
1973 * outputs:
1974 * zc_value name of object
1975 */
1976 static int
1977 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1978 {
1979 objset_t *os;
1980 int error;
1981
1982 /* XXX reading from objset not owned */
1983 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1984 return (error);
1985 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1986 dmu_objset_rele(os, FTAG);
1987 return (SET_ERROR(EINVAL));
1988 }
1989 error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1990 sizeof (zc->zc_value));
1991 dmu_objset_rele(os, FTAG);
1992
1993 return (error);
1994 }
1995
1996 /*
1997 * inputs:
1998 * zc_name name of filesystem
1999 * zc_obj object to find
2000 *
2001 * outputs:
2002 * zc_stat stats on object
2003 * zc_value path to object
2004 */
2005 static int
2006 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
2007 {
2008 objset_t *os;
2009 int error;
2010
2011 /* XXX reading from objset not owned */
2012 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
2013 return (error);
2014 if (dmu_objset_type(os) != DMU_OST_ZFS) {
2015 dmu_objset_rele(os, FTAG);
2016 return (SET_ERROR(EINVAL));
2017 }
2018 error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
2019 sizeof (zc->zc_value));
2020 dmu_objset_rele(os, FTAG);
2021
2022 return (error);
2023 }
2024
2025 static int
2026 zfs_ioc_vdev_add(zfs_cmd_t *zc)
2027 {
2028 spa_t *spa;
2029 int error;
2030 nvlist_t *config, **l2cache, **spares;
2031 uint_t nl2cache = 0, nspares = 0;
2032 nvlist_t *event;
2033
2034 error = spa_open(zc->zc_name, &spa, FTAG);
2035 if (error != 0)
2036 return (error);
2037
2038 error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2039 zc->zc_iflags, &config);
2040 (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
2041 &l2cache, &nl2cache);
2042
2043 (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
2044 &spares, &nspares);
2045
2046 /*
2047 * A root pool with concatenated devices is not supported.
2048 * Thus, can not add a device to a root pool.
2049 *
2050 * Intent log device can not be added to a rootpool because
2051 * during mountroot, zil is replayed, a seperated log device
2052 * can not be accessed during the mountroot time.
2053 *
2054 * l2cache and spare devices are ok to be added to a rootpool.
2055 */
2056 if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
2057 nvlist_free(config);
2058 spa_close(spa, FTAG);
2059 return (SET_ERROR(EDOM));
2060 }
2061
2062 if (error == 0) {
2063 error = spa_vdev_add(spa, config);
2064 if (error == 0) {
2065 event = fnvlist_alloc();
2066 fnvlist_add_string(event, "pool", zc->zc_name);
2067 fnvlist_add_nvlist(event, "config", config);
2068 zfs_event_post(ZPOOL_EC_STATUS, "add", event);
2069
2070 }
2071 nvlist_free(config);
2072 }
2073 spa_close(spa, FTAG);
2074 return (error);
2075 }
2076
2077 /*
2078 * inputs:
2079 * zc_name name of the pool
2080 * zc_nvlist_conf nvlist of devices to remove
2081 * zc_cookie to stop the remove?
2082 */
2083 static int
2084 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
2085 {
2086 spa_t *spa;
2087 int error;
2088 nvlist_t *event;
2089
2090 error = spa_open(zc->zc_name, &spa, FTAG);
2091 if (error != 0)
2092 return (error);
2093 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
2094 if (error == 0) {
2095 event = fnvlist_alloc();
2096 fnvlist_add_string(event, "pool", zc->zc_name);
2097 fnvlist_add_uint64(event, "guid", zc->zc_guid);
2098 zfs_event_post(ZPOOL_EC_STATUS, "remove", event);
2099 }
2100
2101 spa_close(spa, FTAG);
2102 return (error);
2103 }
2104
2105 static int
2106 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
2107 {
2108 spa_t *spa;
2109 int error;
2110 vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2111
2112 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2113 return (error);
2114 switch (zc->zc_cookie) {
2115 case VDEV_STATE_ONLINE:
2116 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2117 break;
2118
2119 case VDEV_STATE_OFFLINE:
2120 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2121 break;
2122
2123 case VDEV_STATE_FAULTED:
2124 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2125 zc->zc_obj != VDEV_AUX_EXTERNAL &&
2126 zc->zc_obj != VDEV_AUX_OPEN_FAILED)
2127 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2128
2129 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2130 break;
2131
2132 case VDEV_STATE_DEGRADED:
2133 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2134 zc->zc_obj != VDEV_AUX_EXTERNAL)
2135 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2136
2137 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2138 break;
2139
2140 default:
2141 error = SET_ERROR(EINVAL);
2142 }
2143 zc->zc_cookie = newstate;
2144 spa_close(spa, FTAG);
2145 return (error);
2146 }
2147
2148 static int
2149 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2150 {
2151 spa_t *spa;
2152 int replacing = zc->zc_cookie;
2153 nvlist_t *config;
2154 nvlist_t *event;
2155 int error;
2156
2157 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2158 return (error);
2159
2160 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2161 zc->zc_iflags, &config)) == 0) {
2162 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
2163 if (error == 0) {
2164 event = fnvlist_alloc();
2165 fnvlist_add_string(event, "pool", zc->zc_name);
2166 fnvlist_add_nvlist(event, "config", config);
2167 fnvlist_add_int32(event, "replacing", replacing);
2168 zfs_event_post(ZPOOL_EC_STATUS, "attach", event);
2169 }
2170 nvlist_free(config);
2171 }
2172
2173 spa_close(spa, FTAG);
2174 return (error);
2175 }
2176
2177 static int
2178 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2179 {
2180 spa_t *spa;
2181 int error;
2182 nvlist_t *event;
2183
2184 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2185 return (error);
2186
2187 error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2188 if (error == 0) {
2189 event = fnvlist_alloc();
2190 fnvlist_add_string(event, "pool", zc->zc_name);
2191 fnvlist_add_uint64(event, "guid", zc->zc_guid);
2192 zfs_event_post(ZPOOL_EC_STATUS, "detach", event);
2193 }
2194 spa_close(spa, FTAG);
2195 return (error);
2196 }
2197
2198 static int
2199 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2200 {
2201 spa_t *spa;
2202 nvlist_t *config, *props = NULL;
2203 int error;
2204 boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2205
2206 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2207 return (error);
2208
2209 if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2210 zc->zc_iflags, &config)) {
2211 spa_close(spa, FTAG);
2212 return (error);
2213 }
2214
2215 if (zc->zc_nvlist_src_size != 0 && (error =
2216 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2217 zc->zc_iflags, &props))) {
2218 spa_close(spa, FTAG);
2219 nvlist_free(config);
2220 return (error);
2221 }
2222
2223 error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2224
2225 spa_close(spa, FTAG);
2226
2227 nvlist_free(config);
2228 nvlist_free(props);
2229
2230 return (error);
2231 }
2232
2233 static int
2234 zfs_ioc_vdev_setl2adddt(zfs_cmd_t *zc)
2235 {
2236 spa_t *spa;
2237 int error;
2238 uint64_t guid = zc->zc_guid;
2239 char *l2ad_ddt = zc->zc_value;
2240
2241 error = spa_open(zc->zc_name, &spa, FTAG);
2242 if (error != 0)
2243 return (error);
2244
2245 error = spa_vdev_setl2adddt(spa, guid, l2ad_ddt);
2246 spa_close(spa, FTAG);
2247 return (error);
2248 }
2249
2250
2251 static int
2252 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2253 {
2254 spa_t *spa;
2255 char *path = zc->zc_value;
2256 uint64_t guid = zc->zc_guid;
2257 int error;
2258
2259 error = spa_open(zc->zc_name, &spa, FTAG);
2260 if (error != 0)
2261 return (error);
2262
2263 error = spa_vdev_setpath(spa, guid, path);
2264 spa_close(spa, FTAG);
2265 return (error);
2266 }
2267
2268 static int
2269 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2270 {
2271 spa_t *spa;
2272 char *fru = zc->zc_value;
2273 uint64_t guid = zc->zc_guid;
2274 int error;
2275
2276 error = spa_open(zc->zc_name, &spa, FTAG);
2277 if (error != 0)
2278 return (error);
2279
2280 error = spa_vdev_setfru(spa, guid, fru);
2281 spa_close(spa, FTAG);
2282 return (error);
2283 }
2284
2285 static int
2286 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2287 {
2288 int error = 0;
2289 nvlist_t *nv;
2290
2291 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2292
2293 if (zc->zc_nvlist_dst != 0 &&
2294 (error = dsl_prop_get_all(os, &nv)) == 0) {
2295 dmu_objset_stats(os, nv);
2296 /*
2297 * NB: zvol_get_stats() will read the objset contents,
2298 * which we aren't supposed to do with a
2299 * DS_MODE_USER hold, because it could be
2300 * inconsistent. So this is a bit of a workaround...
2301 * XXX reading with out owning
2302 */
2303 if (!zc->zc_objset_stats.dds_inconsistent &&
2304 dmu_objset_type(os) == DMU_OST_ZVOL) {
2305 error = zvol_get_stats(os, nv);
2306 if (error == EIO)
2307 return (error);
2308 VERIFY0(error);
2309 }
2310 error = put_nvlist(zc, nv);
2311 nvlist_free(nv);
2312 }
2313
2314 return (error);
2315 }
2316
2317 /*
2318 * inputs:
2319 * zc_name name of filesystem
2320 * zc_nvlist_dst_size size of buffer for property nvlist
2321 *
2322 * outputs:
2323 * zc_objset_stats stats
2324 * zc_nvlist_dst property nvlist
2325 * zc_nvlist_dst_size size of property nvlist
2326 */
2327 static int
2328 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2329 {
2330 objset_t *os = NULL;
2331 int error;
2332
2333 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2334 if (error == 0) {
2335 error = zfs_ioc_objset_stats_impl(zc, os);
2336 dmu_objset_rele(os, FTAG);
2337 }
2338
2339 return (error);
2340 }
2341
2342 /*
2343 * inputs:
2344 * zc_name name of filesystem
2345 * zc_nvlist_dst_size size of buffer for property nvlist
2346 *
2347 * outputs:
2348 * zc_nvlist_dst received property nvlist
2349 * zc_nvlist_dst_size size of received property nvlist
2350 *
2351 * Gets received properties (distinct from local properties on or after
2352 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2353 * local property values.
2354 */
2355 static int
2356 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2357 {
2358 int error = 0;
2359 nvlist_t *nv;
2360
2361 /*
2362 * Without this check, we would return local property values if the
2363 * caller has not already received properties on or after
2364 * SPA_VERSION_RECVD_PROPS.
2365 */
2366 if (!dsl_prop_get_hasrecvd(zc->zc_name))
2367 return (SET_ERROR(ENOTSUP));
2368
2369 if (zc->zc_nvlist_dst != 0 &&
2370 (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2371 error = put_nvlist(zc, nv);
2372 nvlist_free(nv);
2373 }
2374
2375 return (error);
2376 }
2377
2378 static int
2379 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2380 {
2381 uint64_t value;
2382 int error;
2383
2384 /*
2385 * zfs_get_zplprop() will either find a value or give us
2386 * the default value (if there is one).
2387 */
2388 if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2389 return (error);
2390 VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2391 return (0);
2392 }
2393
2394 /*
2395 * inputs:
2396 * zc_name name of filesystem
2397 * zc_nvlist_dst_size size of buffer for zpl property nvlist
2398 *
2399 * outputs:
2400 * zc_nvlist_dst zpl property nvlist
2401 * zc_nvlist_dst_size size of zpl property nvlist
2402 */
2403 static int
2404 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2405 {
2406 objset_t *os;
2407 int err;
2408
2409 /* XXX reading without owning */
2410 if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2411 return (err);
2412
2413 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2414
2415 /*
2416 * NB: nvl_add_zplprop() will read the objset contents,
2417 * which we aren't supposed to do with a DS_MODE_USER
2418 * hold, because it could be inconsistent.
2419 */
2420 if (zc->zc_nvlist_dst != NULL &&
2421 !zc->zc_objset_stats.dds_inconsistent &&
2422 dmu_objset_type(os) == DMU_OST_ZFS) {
2423 nvlist_t *nv;
2424
2425 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2426 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2427 (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2428 (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2429 (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2430 err = put_nvlist(zc, nv);
2431 nvlist_free(nv);
2432 } else {
2433 err = SET_ERROR(ENOENT);
2434 }
2435 dmu_objset_rele(os, FTAG);
2436 return (err);
2437 }
2438
2439 /*
2440 * inputs:
2441 * zc_name name of filesystem
2442 * zc_cookie zap cursor
2443 * zc_nvlist_dst_size size of buffer for property nvlist
2444 *
2445 * outputs:
2446 * zc_name name of next filesystem
2447 * zc_cookie zap cursor
2448 * zc_objset_stats stats
2449 * zc_nvlist_dst property nvlist
2450 * zc_nvlist_dst_size size of property nvlist
2451 */
2452 static int
2453 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2454 {
2455 objset_t *os;
2456 int error;
2457 char *p;
2458 size_t orig_len = strlen(zc->zc_name);
2459
2460 top:
2461 if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2462 if (error == ENOENT)
2463 error = SET_ERROR(ESRCH);
2464 return (error);
2465 }
2466
2467 p = strrchr(zc->zc_name, '/');
2468 if (p == NULL || p[1] != '\0')
2469 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2470 p = zc->zc_name + strlen(zc->zc_name);
2471
2472 do {
2473 error = dmu_dir_list_next(os,
2474 sizeof (zc->zc_name) - (p - zc->zc_name), p,
2475 NULL, &zc->zc_cookie);
2476 if (error == ENOENT)
2477 error = SET_ERROR(ESRCH);
2478 } while (error == 0 && dataset_name_hidden(zc->zc_name));
2479 dmu_objset_rele(os, FTAG);
2480
2481 /*
2482 * If it's an internal dataset (ie. with a '$' in its name),
2483 * don't try to get stats for it, otherwise we'll return ENOENT.
2484 */
2485 if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2486 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2487 if (error == ENOENT) {
2488 /* We lost a race with destroy, get the next one. */
2489 zc->zc_name[orig_len] = '\0';
2490 goto top;
2491 }
2492 }
2493 return (error);
2494 }
2495
2496 /*
2497 * inputs:
2498 * zc_name name of filesystem
2499 * zc_cookie zap cursor
2500 * zc_nvlist_dst_size size of buffer for property nvlist
2501 * zc_simple when set, only name is requested
2502 *
2503 * outputs:
2504 * zc_name name of next snapshot
2505 * zc_objset_stats stats
2506 * zc_nvlist_dst property nvlist
2507 * zc_nvlist_dst_size size of property nvlist
2508 */
2509 static int
2510 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2511 {
2512 objset_t *os;
2513 int error;
2514
2515 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2516 if (error != 0) {
2517 return (error == ENOENT ? ESRCH : error);
2518 }
2519
2520 /*
2521 * A dataset name of maximum length cannot have any snapshots,
2522 * so exit immediately.
2523 */
2524 if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2525 ZFS_MAX_DATASET_NAME_LEN) {
2526 dmu_objset_rele(os, FTAG);
2527 return (SET_ERROR(ESRCH));
2528 }
2529
2530 error = dmu_snapshot_list_next(os,
2531 sizeof (zc->zc_name) - strlen(zc->zc_name),
2532 zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2533 NULL);
2534
2535 if (error == 0 && !zc->zc_simple) {
2536 dsl_dataset_t *ds;
2537 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2538
2539 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2540 if (error == 0) {
2541 objset_t *ossnap;
2542
2543 error = dmu_objset_from_ds(ds, &ossnap);
2544 if (error == 0)
2545 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2546 dsl_dataset_rele(ds, FTAG);
2547 }
2548 } else if (error == ENOENT) {
2549 error = SET_ERROR(ESRCH);
2550 }
2551
2552 dmu_objset_rele(os, FTAG);
2553 /* if we failed, undo the @ that we tacked on to zc_name */
2554 if (error != 0)
2555 *strchr(zc->zc_name, '@') = '\0';
2556 return (error);
2557 }
2558
2559 static int
2560 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2561 {
2562 const char *propname = nvpair_name(pair);
2563 uint64_t *valary;
2564 unsigned int vallen;
2565 const char *domain;
2566 char *dash;
2567 zfs_userquota_prop_t type;
2568 uint64_t rid;
2569 uint64_t quota;
2570 zfsvfs_t *zfsvfs;
2571 int err;
2572
2573 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2574 nvlist_t *attrs;
2575 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2576 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2577 &pair) != 0)
2578 return (SET_ERROR(EINVAL));
2579 }
2580
2581 /*
2582 * A correctly constructed propname is encoded as
2583 * userquota@<rid>-<domain>.
2584 */
2585 if ((dash = strchr(propname, '-')) == NULL ||
2586 nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2587 vallen != 3)
2588 return (SET_ERROR(EINVAL));
2589
2590 domain = dash + 1;
2591 type = valary[0];
2592 rid = valary[1];
2593 quota = valary[2];
2594
2595 err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2596 if (err == 0) {
2597 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2598 zfsvfs_rele(zfsvfs, FTAG);
2599 }
2600
2601 return (err);
2602 }
2603
2604 /*
2605 * If the named property is one that has a special function to set its value,
2606 * return 0 on success and a positive error code on failure; otherwise if it is
2607 * not one of the special properties handled by this function, return -1.
2608 *
2609 * XXX: It would be better for callers of the property interface if we handled
2610 * these special cases in dsl_prop.c (in the dsl layer).
2611 */
2612 static int
2613 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2614 nvpair_t *pair)
2615 {
2616 const char *propname = nvpair_name(pair);
2617 zfs_prop_t prop = zfs_name_to_prop(propname);
2618 uint64_t intval;
2619 int err = -1;
2620
2621 if (prop == ZPROP_INVAL) {
2622 if (zfs_prop_userquota(propname))
2623 return (zfs_prop_set_userquota(dsname, pair));
2624 return (-1);
2625 }
2626
2627 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2628 nvlist_t *attrs;
2629 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2630 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2631 &pair) == 0);
2632 }
2633
2634 if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2635 return (-1);
2636
2637 VERIFY(0 == nvpair_value_uint64(pair, &intval));
2638
2639 switch (prop) {
2640 case ZFS_PROP_QUOTA:
2641 err = dsl_dir_set_quota(dsname, source, intval);
2642 break;
2643 case ZFS_PROP_REFQUOTA:
2644 err = dsl_dataset_set_refquota(dsname, source, intval);
2645 break;
2646 case ZFS_PROP_FILESYSTEM_LIMIT:
2647 case ZFS_PROP_SNAPSHOT_LIMIT:
2648 if (intval == UINT64_MAX) {
2649 /* clearing the limit, just do it */
2650 err = 0;
2651 } else {
2652 err = dsl_dir_activate_fs_ss_limit(dsname);
2653 }
2654 /*
2655 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2656 * default path to set the value in the nvlist.
2657 */
2658 if (err == 0)
2659 err = -1;
2660 break;
2661 case ZFS_PROP_RESERVATION:
2662 err = dsl_dir_set_reservation(dsname, source, intval);
2663 break;
2664 case ZFS_PROP_REFRESERVATION:
2665 err = dsl_dataset_set_refreservation(dsname, source, intval);
2666 break;
2667 case ZFS_PROP_VOLSIZE:
2668 err = zvol_set_volsize(dsname, intval);
2669 break;
2670 case ZFS_PROP_VERSION:
2671 {
2672 zfsvfs_t *zfsvfs;
2673
2674 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2675 break;
2676
2677 err = zfs_set_version(zfsvfs, intval);
2678 zfsvfs_rele(zfsvfs, FTAG);
2679
2680 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2681 zfs_cmd_t *zc;
2682
2683 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2684 (void) strcpy(zc->zc_name, dsname);
2685 (void) zfs_ioc_userspace_upgrade(zc);
2686 kmem_free(zc, sizeof (zfs_cmd_t));
2687 }
2688 break;
2689 }
2690 default:
2691 err = -1;
2692 }
2693
2694 return (err);
2695 }
2696
2697 /*
2698 * This function is best effort. If it fails to set any of the given properties,
2699 * it continues to set as many as it can and returns the last error
2700 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2701 * with the list of names of all the properties that failed along with the
2702 * corresponding error numbers.
2703 *
2704 * If every property is set successfully, zero is returned and errlist is not
2705 * modified.
2706 */
2707 int
2708 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2709 nvlist_t *errlist)
2710 {
2711 spa_t *spa = NULL;
2712 nvpair_t *pair;
2713 nvpair_t *propval;
2714 int rv = 0;
2715 uint64_t intval;
2716 char *strval;
2717 nvlist_t *genericnvl = fnvlist_alloc();
2718 nvlist_t *retrynvl = fnvlist_alloc();
2719 zfsvfs_t *zfsvfs;
2720 boolean_t set_worm = B_FALSE;
2721 boolean_t set_wbc_mode = B_FALSE;
2722 boolean_t wbc_walk_locked = B_FALSE;
2723 boolean_t set_dedup = B_FALSE;
2724
2725 if ((rv = spa_open(dsname, &spa, FTAG)) != 0)
2726 return (rv);
2727
2728 retry:
2729 pair = NULL;
2730 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2731 const char *propname = nvpair_name(pair);
2732 zfs_prop_t prop = zfs_name_to_prop(propname);
2733 int err = 0;
2734
2735 if (!set_worm && (strcmp(propname, "nms:worm") == 0)) {
2736 set_worm = B_TRUE;
2737 }
2738
2739 /*
2740 * If 'wbc_mode' is going to be changed, then we need to
2741 * do some actions before 'set'
2742 */
2743 if (prop == ZFS_PROP_WBC_MODE)
2744 set_wbc_mode = B_TRUE;
2745
2746 /*
2747 *
2748 */
2749 if (prop == ZFS_PROP_DEDUP)
2750 set_dedup = B_TRUE;
2751
2752 /* decode the property value */
2753 propval = pair;
2754 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2755 nvlist_t *attrs;
2756 attrs = fnvpair_value_nvlist(pair);
2757 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2758 &propval) != 0)
2759 err = SET_ERROR(EINVAL);
2760 }
2761
2762 /* Validate value type */
2763 if (err == 0 && prop == ZPROP_INVAL) {
2764 if (zfs_prop_user(propname)) {
2765 if (nvpair_type(propval) != DATA_TYPE_STRING)
2766 err = SET_ERROR(EINVAL);
2767 } else if (zfs_prop_userquota(propname)) {
2768 if (nvpair_type(propval) !=
2769 DATA_TYPE_UINT64_ARRAY)
2770 err = SET_ERROR(EINVAL);
2771 } else {
2772 err = SET_ERROR(EINVAL);
2773 }
2774 } else if (err == 0) {
2775 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2776 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2777 err = SET_ERROR(EINVAL);
2778 } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2779 const char *unused;
2780
2781 intval = fnvpair_value_uint64(propval);
2782
2783 switch (zfs_prop_get_type(prop)) {
2784 case PROP_TYPE_NUMBER:
2785 break;
2786 case PROP_TYPE_STRING:
2787 err = SET_ERROR(EINVAL);
2788 break;
2789 case PROP_TYPE_INDEX:
2790 if (zfs_prop_index_to_string(prop,
2791 intval, &unused) != 0)
2792 err = SET_ERROR(EINVAL);
2793 break;
2794 default:
2795 cmn_err(CE_PANIC,
2796 "unknown property type");
2797 }
2798 } else {
2799 err = SET_ERROR(EINVAL);
2800 }
2801 }
2802
2803 /* Validate permissions */
2804 if (err == 0)
2805 err = zfs_check_settable(dsname, pair, CRED());
2806
2807 if (err == 0) {
2808 err = zfs_prop_set_special(dsname, source, pair);
2809 if (err == -1) {
2810 /*
2811 * For better performance we build up a list of
2812 * properties to set in a single transaction.
2813 */
2814 err = nvlist_add_nvpair(genericnvl, pair);
2815 } else if (err != 0 && nvl != retrynvl) {
2816 /*
2817 * This may be a spurious error caused by
2818 * receiving quota and reservation out of order.
2819 * Try again in a second pass.
2820 */
2821 err = nvlist_add_nvpair(retrynvl, pair);
2822 }
2823 }
2824
2825 if (err != 0) {
2826 if (errlist != NULL)
2827 fnvlist_add_int32(errlist, propname, err);
2828 rv = err;
2829 }
2830 }
2831
2832 if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2833 nvl = retrynvl;
2834 goto retry;
2835 }
2836
2837 /*
2838 * Deduplication and WBC cannot be used together
2839 * This code returns error also for case when
2840 * WBC is ON, DEDUP is off and a user tries
2841 * to do DEDUP=off, because in this case the code
2842 * will be more complex, but benefit is too small
2843 */
2844 if (set_wbc_mode && set_dedup) {
2845 nvlist_free(genericnvl);
2846 nvlist_free(retrynvl);
2847 spa_close(spa, FTAG);
2848
2849 return (SET_ERROR(EKZFS_WBCCONFLICT));
2850 }
2851
2852 /*
2853 * Additional actions before set wbc_mode:
2854 * - first need to try to lock WBC-walking, to stop migration and
2855 * avoid the openning of new migration window
2856 * - second step (from sync-context): if migration window
2857 * is active it will be purged, to correctly add/remove WBC-instance
2858 */
2859 if (set_wbc_mode && wbc_walk_lock(spa) == 0)
2860 wbc_walk_locked = B_TRUE;
2861
2862 if (!nvlist_empty(genericnvl) &&
2863 dsl_props_set(dsname, source, genericnvl) != 0) {
2864 /*
2865 * If this fails, we still want to set as many properties as we
2866 * can, so try setting them individually.
2867 */
2868 pair = NULL;
2869 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2870 const char *propname = nvpair_name(pair);
2871 int err = 0;
2872
2873 propval = pair;
2874 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2875 nvlist_t *attrs;
2876 attrs = fnvpair_value_nvlist(pair);
2877 propval = fnvlist_lookup_nvpair(attrs,
2878 ZPROP_VALUE);
2879 }
2880
2881 if (nvpair_type(propval) == DATA_TYPE_STRING) {
2882 strval = fnvpair_value_string(propval);
2883 err = dsl_prop_set_string(dsname, propname,
2884 source, strval);
2885 } else {
2886 intval = fnvpair_value_uint64(propval);
2887 err = dsl_prop_set_int(dsname, propname, source,
2888 intval);
2889 }
2890
2891 if (err != 0) {
2892 if (errlist != NULL) {
2893 fnvlist_add_int32(errlist, propname,
2894 err);
2895 }
2896 rv = err;
2897 }
2898 }
2899 }
2900 nvlist_free(genericnvl);
2901 nvlist_free(retrynvl);
2902
2903 if (wbc_walk_locked)
2904 wbc_walk_unlock(spa);
2905
2906 if (set_worm && getzfsvfs(dsname, &zfsvfs) == 0) {
2907 if (zfs_is_wormed(dsname)) {
2908 zfsvfs->z_isworm = B_TRUE;
2909 } else {
2910 zfsvfs->z_isworm = B_FALSE;
2911 }
2912 VFS_RELE(zfsvfs->z_vfs);
2913 }
2914
2915 if (rv == 0)
2916 autosnap_force_snap_by_name(dsname, NULL, B_FALSE);
2917
2918 spa_close(spa, FTAG);
2919
2920 return (rv);
2921 }
2922
2923 /*
2924 * Check that all the properties are valid user properties.
2925 */
2926 static int
2927 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2928 {
2929 nvpair_t *pair = NULL;
2930 int error = 0;
2931
2932 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2933 const char *propname = nvpair_name(pair);
2934
2935 if (!zfs_prop_user(propname) ||
2936 nvpair_type(pair) != DATA_TYPE_STRING)
2937 return (SET_ERROR(EINVAL));
2938
2939 if (error = zfs_secpolicy_write_perms(fsname,
2940 ZFS_DELEG_PERM_USERPROP, CRED()))
2941 return (error);
2942
2943 if (strlen(propname) >= ZAP_MAXNAMELEN)
2944 return (SET_ERROR(ENAMETOOLONG));
2945
2946 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2947 return (SET_ERROR(E2BIG));
2948 }
2949 return (0);
2950 }
2951
2952 static void
2953 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2954 {
2955 nvpair_t *pair;
2956
2957 VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2958
2959 pair = NULL;
2960 while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2961 if (nvlist_exists(skipped, nvpair_name(pair)))
2962 continue;
2963
2964 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2965 }
2966 }
2967
2968 static int
2969 clear_received_props(const char *dsname, nvlist_t *props,
2970 nvlist_t *skipped)
2971 {
2972 int err = 0;
2973 nvlist_t *cleared_props = NULL;
2974 props_skip(props, skipped, &cleared_props);
2975 if (!nvlist_empty(cleared_props)) {
2976 /*
2977 * Acts on local properties until the dataset has received
2978 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2979 */
2980 zprop_source_t flags = (ZPROP_SRC_NONE |
2981 (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2982 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2983 }
2984 nvlist_free(cleared_props);
2985 return (err);
2986 }
2987
2988 int
2989 zfs_ioc_set_prop_impl(char *name, nvlist_t *props,
2990 boolean_t received, nvlist_t **out_errors)
2991 {
2992 int error = 0;
2993 nvlist_t *errors, *event;
2994 zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2995 ZPROP_SRC_LOCAL);
2996
2997 ASSERT(props != NULL);
2998
2999 if (received) {
3000 nvlist_t *origprops;
3001
3002 if (dsl_prop_get_received(name, &origprops) == 0) {
3003 (void) clear_received_props(name, origprops, props);
3004 nvlist_free(origprops);
3005 }
3006
3007 error = dsl_prop_set_hasrecvd(name);
3008 }
3009
3010 errors = fnvlist_alloc();
3011 if (error == 0)
3012 error = zfs_set_prop_nvlist(name, source, props, errors);
3013
3014 event = fnvlist_alloc();
3015 fnvlist_add_string(event, "fsname", name);
3016 fnvlist_add_nvlist(event, "properties", props);
3017 fnvlist_add_nvlist(event, "errors", errors);
3018 zfs_event_post(ZFS_EC_STATUS, "set", event);
3019
3020 if (out_errors != NULL)
3021 *out_errors = fnvlist_dup(errors);
3022
3023 fnvlist_free(errors);
3024
3025 return (error);
3026 }
3027
3028 /*
3029 * XXX This functionality will be removed after integration of
3030 * functionality, that does the same via zfs-channel programm.
3031 * The zfs-channel programm implementation is being developed
3032 * by Delphix.
3033 *
3034 * This functions sets provided props for provided datasets
3035 * in one sync-round. There are some requirements:
3036 * - all datasets should belong to the same pool
3037 * - only user-properties
3038 *
3039 * This function does all or nothing.
3040 *
3041 * inputs:
3042 * zc_nvlist_src{_size} nvlist of datasets and properties to apply
3043 *
3044 * outputs:
3045 * zc_nvlist_dst{_size} error for each unapplied property
3046 */
3047 /* ARGSUSED */
3048 static int
3049 zfs_ioc_set_prop_mds(const char *pool_name, nvlist_t *dss_props,
3050 nvlist_t *outnvl)
3051 {
3052 int error = 0;
3053 spa_t *spa = NULL;
3054 nvpair_t *pair = NULL;
3055 size_t pool_name_len;
3056 size_t total_num_props = 0;
3057
3058 ASSERT(dss_props != NULL);
3059
3060 if (nvlist_empty(dss_props))
3061 return (SET_ERROR(ENODATA));
3062
3063 pool_name_len = strlen(pool_name);
3064 while ((pair = nvlist_next_nvpair(dss_props, pair)) != NULL) {
3065 nvlist_t *props;
3066 nvpair_t *prop_nvp = NULL;
3067 const char *ds_name;
3068
3069 ds_name = nvpair_name(pair);
3070 if (strncmp(pool_name, ds_name, pool_name_len) == 0) {
3071 char c = ds_name[pool_name_len];
3072 if (c != '\0' && c != '/' && c != '@')
3073 return (SET_ERROR(EXDEV));
3074 }
3075
3076 if (nvpair_type(pair) != DATA_TYPE_NVLIST)
3077 return (SET_ERROR(EINVAL));
3078
3079 props = fnvpair_value_nvlist(pair);
3080 while ((prop_nvp = nvlist_next_nvpair(props,
3081 prop_nvp)) != NULL) {
3082 const char *propname = nvpair_name(prop_nvp);
3083 /* Only user-props */
3084 if (!zfs_prop_user(propname) ||
3085 nvpair_type(prop_nvp) != DATA_TYPE_STRING)
3086 return (SET_ERROR(EINVAL));
3087
3088 /*
3089 * We count the number to use it
3090 * later to check for ENOSPC
3091 */
3092 total_num_props++;
3093 }
3094 }
3095
3096 if ((error = spa_open(pool_name, &spa, FTAG)) != 0)
3097 return (error);
3098
3099 error = dsl_props_set_mds(pool_name, dss_props, total_num_props);
3100 spa_close(spa, FTAG);
3101 if (error == 0) {
3102 nvlist_t *event = fnvlist_alloc();
3103 fnvlist_add_nvlist(event, "properties", dss_props);
3104 zfs_event_post(ZFS_EC_STATUS, "set-mds", event);
3105 }
3106
3107 return (error);
3108 }
3109
3110 /*
3111 * inputs:
3112 * zc_name name of filesystem
3113 * zc_value name of property to set
3114 * zc_nvlist_src{_size} nvlist of properties to apply
3115 * zc_cookie received properties flag
3116 *
3117 * outputs:
3118 * zc_nvlist_dst{_size} error for each unapplied received property
3119 */
3120 static int
3121 zfs_ioc_set_prop(zfs_cmd_t *zc)
3122 {
3123 nvlist_t *nvl;
3124 boolean_t received = zc->zc_cookie;
3125 nvlist_t *errors = NULL;
3126 int error;
3127
3128 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3129 zc->zc_iflags, &nvl)) != 0)
3130 return (error);
3131
3132 error = zfs_ioc_set_prop_impl(zc->zc_name, nvl, received, &errors);
3133
3134 if (zc->zc_nvlist_dst != NULL && errors != NULL) {
3135 (void) put_nvlist(zc, errors);
3136 }
3137
3138 nvlist_free(errors);
3139 nvlist_free(nvl);
3140 return (error);
3141 }
3142
3143 /*
3144 * inputs:
3145 * zc_name name of filesystem
3146 * zc_value name of property to inherit
3147 * zc_cookie revert to received value if TRUE
3148 *
3149 * outputs: none
3150 */
3151 static int
3152 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
3153 {
3154 const char *propname = zc->zc_value;
3155 zfs_prop_t prop = zfs_name_to_prop(propname);
3156 boolean_t received = zc->zc_cookie;
3157 zprop_source_t source = (received
3158 ? ZPROP_SRC_NONE /* revert to received value, if any */
3159 : ZPROP_SRC_INHERITED); /* explicitly inherit */
3160
3161 if (received) {
3162 nvlist_t *dummy;
3163 nvpair_t *pair;
3164 zprop_type_t type;
3165 int err;
3166
3167 /*
3168 * zfs_prop_set_special() expects properties in the form of an
3169 * nvpair with type info.
3170 */
3171 if (prop == ZPROP_INVAL) {
3172 if (!zfs_prop_user(propname))
3173 return (SET_ERROR(EINVAL));
3174
3175 type = PROP_TYPE_STRING;
3176 } else if (prop == ZFS_PROP_VOLSIZE ||
3177 prop == ZFS_PROP_VERSION) {
3178 return (SET_ERROR(EINVAL));
3179 } else {
3180 type = zfs_prop_get_type(prop);
3181 }
3182
3183 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3184
3185 switch (type) {
3186 case PROP_TYPE_STRING:
3187 VERIFY(0 == nvlist_add_string(dummy, propname, ""));
3188 break;
3189 case PROP_TYPE_NUMBER:
3190 case PROP_TYPE_INDEX:
3191 VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
3192 break;
3193 default:
3194 nvlist_free(dummy);
3195 return (SET_ERROR(EINVAL));
3196 }
3197
3198 pair = nvlist_next_nvpair(dummy, NULL);
3199 err = zfs_prop_set_special(zc->zc_name, source, pair);
3200 nvlist_free(dummy);
3201 if (err != -1)
3202 return (err); /* special property already handled */
3203 } else {
3204 /*
3205 * Only check this in the non-received case. We want to allow
3206 * 'inherit -S' to revert non-inheritable properties like quota
3207 * and reservation to the received or default values even though
3208 * they are not considered inheritable.
3209 */
3210 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
3211 return (SET_ERROR(EINVAL));
3212 }
3213
3214 /* property name has been validated by zfs_secpolicy_inherit_prop() */
3215 return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
3216 }
3217
3218 static int
3219 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
3220 {
3221 nvlist_t *props;
3222 spa_t *spa;
3223 int error;
3224 nvpair_t *pair;
3225 nvlist_t *event;
3226 if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3227 zc->zc_iflags, &props))
3228 return (error);
3229
3230 /*
3231 * If the only property is the configfile, then just do a spa_lookup()
3232 * to handle the faulted case.
3233 */
3234 pair = nvlist_next_nvpair(props, NULL);
3235 if (pair != NULL && strcmp(nvpair_name(pair),
3236 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
3237 nvlist_next_nvpair(props, pair) == NULL) {
3238 mutex_enter(&spa_namespace_lock);
3239 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
3240 spa_configfile_set(spa, props, B_FALSE);
3241 spa_config_sync(spa, B_FALSE, B_TRUE);
3242 }
3243 mutex_exit(&spa_namespace_lock);
3244 if (spa != NULL) {
3245 nvlist_free(props);
3246 return (0);
3247 }
3248 }
3249
3250 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3251 nvlist_free(props);
3252 return (error);
3253 }
3254
3255 error = spa_prop_set(spa, props);
3256
3257 if (error == 0) {
3258 event = fnvlist_alloc();
3259 fnvlist_add_string(event, "pool", zc->zc_name);
3260 fnvlist_add_nvlist(event, "props", props);
3261 zfs_event_post(ZPOOL_EC_STATUS, "set", event);
3262 }
3263
3264 nvlist_free(props);
3265 spa_close(spa, FTAG);
3266
3267 return (error);
3268 }
3269
3270 static int
3271 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
3272 {
3273 spa_t *spa;
3274 int error;
3275 nvlist_t *nvp = NULL;
3276
3277 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3278 /*
3279 * If the pool is faulted, there may be properties we can still
3280 * get (such as altroot and cachefile), so attempt to get them
3281 * anyway.
3282 */
3283 mutex_enter(&spa_namespace_lock);
3284 if ((spa = spa_lookup(zc->zc_name)) != NULL)
3285 error = spa_prop_get(spa, &nvp);
3286 mutex_exit(&spa_namespace_lock);
3287 } else {
3288 error = spa_prop_get(spa, &nvp);
3289 spa_close(spa, FTAG);
3290 }
3291
3292 if (error == 0 && zc->zc_nvlist_dst != NULL)
3293 error = put_nvlist(zc, nvp);
3294 else
3295 error = SET_ERROR(EFAULT);
3296
3297 nvlist_free(nvp);
3298 return (error);
3299 }
3300
3301 /*
3302 * inputs:
3303 * zc_name name of filesystem
3304 * zc_nvlist_src{_size} nvlist of delegated permissions
3305 * zc_perm_action allow/unallow flag
3306 *
3307 * outputs: none
3308 */
3309 static int
3310 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3311 {
3312 int error;
3313 nvlist_t *fsaclnv = NULL;
3314
3315 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3316 zc->zc_iflags, &fsaclnv)) != 0)
3317 return (error);
3318
3319 /*
3320 * Verify nvlist is constructed correctly
3321 */
3322 if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
3323 nvlist_free(fsaclnv);
3324 return (SET_ERROR(EINVAL));
3325 }
3326
3327 /*
3328 * If we don't have PRIV_SYS_MOUNT, then validate
3329 * that user is allowed to hand out each permission in
3330 * the nvlist(s)
3331 */
3332
3333 error = secpolicy_zfs(CRED());
3334 if (error != 0) {
3335 if (zc->zc_perm_action == B_FALSE) {
3336 error = dsl_deleg_can_allow(zc->zc_name,
3337 fsaclnv, CRED());
3338 } else {
3339 error = dsl_deleg_can_unallow(zc->zc_name,
3340 fsaclnv, CRED());
3341 }
3342 }
3343
3344 if (error == 0)
3345 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3346
3347 nvlist_free(fsaclnv);
3348 return (error);
3349 }
3350
3351 /*
3352 * inputs:
3353 * zc_name name of filesystem
3354 *
3355 * outputs:
3356 * zc_nvlist_src{_size} nvlist of delegated permissions
3357 */
3358 static int
3359 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3360 {
3361 nvlist_t *nvp;
3362 int error;
3363
3364 if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3365 error = put_nvlist(zc, nvp);
3366 nvlist_free(nvp);
3367 }
3368
3369 return (error);
3370 }
3371
3372 /* ARGSUSED */
3373 static void
3374 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3375 {
3376 zfs_creat_t *zct = arg;
3377
3378 zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3379 }
3380
3381 #define ZFS_PROP_UNDEFINED ((uint64_t)-1)
3382
3383 /*
3384 * inputs:
3385 * createprops list of properties requested by creator
3386 * default_zplver zpl version to use if unspecified in createprops
3387 * fuids_ok fuids allowed in this version of the spa?
3388 * os parent objset pointer (NULL if root fs)
3389 * fuids_ok fuids allowed in this version of the spa?
3390 * sa_ok SAs allowed in this version of the spa?
3391 * createprops list of properties requested by creator
3392 *
3393 * outputs:
3394 * zplprops values for the zplprops we attach to the master node object
3395 * is_ci true if requested file system will be purely case-insensitive
3396 *
3397 * Determine the settings for utf8only, normalization and
3398 * casesensitivity. Specific values may have been requested by the
3399 * creator and/or we can inherit values from the parent dataset. If
3400 * the file system is of too early a vintage, a creator can not
3401 * request settings for these properties, even if the requested
3402 * setting is the default value. We don't actually want to create dsl
3403 * properties for these, so remove them from the source nvlist after
3404 * processing.
3405 */
3406 static int
3407 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3408 boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3409 nvlist_t *zplprops, boolean_t *is_ci)
3410 {
3411 uint64_t sense = ZFS_PROP_UNDEFINED;
3412 uint64_t norm = ZFS_PROP_UNDEFINED;
3413 uint64_t u8 = ZFS_PROP_UNDEFINED;
3414
3415 ASSERT(zplprops != NULL);
3416
3417 if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3418 return (SET_ERROR(EINVAL));
3419
3420 /*
3421 * Pull out creator prop choices, if any.
3422 */
3423 if (createprops) {
3424 (void) nvlist_lookup_uint64(createprops,
3425 zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3426 (void) nvlist_lookup_uint64(createprops,
3427 zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3428 (void) nvlist_remove_all(createprops,
3429 zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3430 (void) nvlist_lookup_uint64(createprops,
3431 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3432 (void) nvlist_remove_all(createprops,
3433 zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3434 (void) nvlist_lookup_uint64(createprops,
3435 zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3436 (void) nvlist_remove_all(createprops,
3437 zfs_prop_to_name(ZFS_PROP_CASE));
3438 }
3439
3440 /*
3441 * If the zpl version requested is whacky or the file system
3442 * or pool is version is too "young" to support normalization
3443 * and the creator tried to set a value for one of the props,
3444 * error out.
3445 */
3446 if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3447 (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3448 (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3449 (zplver < ZPL_VERSION_NORMALIZATION &&
3450 (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3451 sense != ZFS_PROP_UNDEFINED)))
3452 return (SET_ERROR(ENOTSUP));
3453
3454 /*
3455 * Put the version in the zplprops
3456 */
3457 VERIFY(nvlist_add_uint64(zplprops,
3458 zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3459
3460 if (norm == ZFS_PROP_UNDEFINED)
3461 VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3462 VERIFY(nvlist_add_uint64(zplprops,
3463 zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3464
3465 if (os) {
3466 if (zfs_is_wormed_ds(dmu_objset_ds(os)))
3467 return (SET_ERROR(EPERM));
3468 }
3469
3470 /*
3471 * If we're normalizing, names must always be valid UTF-8 strings.
3472 */
3473 if (norm)
3474 u8 = 1;
3475 if (u8 == ZFS_PROP_UNDEFINED)
3476 VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3477 VERIFY(nvlist_add_uint64(zplprops,
3478 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3479
3480 if (sense == ZFS_PROP_UNDEFINED)
3481 VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3482 VERIFY(nvlist_add_uint64(zplprops,
3483 zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3484
3485 if (is_ci)
3486 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3487
3488 return (0);
3489 }
3490
3491 static int
3492 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3493 nvlist_t *zplprops, boolean_t *is_ci)
3494 {
3495 boolean_t fuids_ok, sa_ok;
3496 uint64_t zplver = ZPL_VERSION;
3497 objset_t *os = NULL;
3498 char parentname[ZFS_MAX_DATASET_NAME_LEN];
3499 char *cp;
3500 spa_t *spa;
3501 uint64_t spa_vers;
3502 int error;
3503
3504 (void) strlcpy(parentname, dataset, sizeof (parentname));
3505 cp = strrchr(parentname, '/');
3506 ASSERT(cp != NULL);
3507 cp[0] = '\0';
3508
3509 if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3510 return (error);
3511
3512 spa_vers = spa_version(spa);
3513 spa_close(spa, FTAG);
3514
3515 zplver = zfs_zpl_version_map(spa_vers);
3516 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3517 sa_ok = (zplver >= ZPL_VERSION_SA);
3518
3519 /*
3520 * Open parent object set so we can inherit zplprop values.
3521 */
3522 if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3523 return (error);
3524
3525 error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3526 zplprops, is_ci);
3527 dmu_objset_rele(os, FTAG);
3528 return (error);
3529 }
3530
3531 static int
3532 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3533 nvlist_t *zplprops, boolean_t *is_ci)
3534 {
3535 boolean_t fuids_ok;
3536 boolean_t sa_ok;
3537 uint64_t zplver = ZPL_VERSION;
3538 int error;
3539
3540 zplver = zfs_zpl_version_map(spa_vers);
3541 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3542 sa_ok = (zplver >= ZPL_VERSION_SA);
3543
3544 error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3545 createprops, zplprops, is_ci);
3546 return (error);
3547 }
3548
3549 /*
3550 * innvl: {
3551 * "type" -> dmu_objset_type_t (int32)
3552 * (optional) "props" -> { prop -> value }
3553 * }
3554 *
3555 * outnvl: propname -> error code (int32)
3556 */
3557 static int
3558 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3559 {
3560 int error = 0;
3561 zfs_creat_t zct = { 0 };
3562 nvlist_t *nvprops = NULL;
3563 void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3564 int32_t type32;
3565 dmu_objset_type_t type;
3566 boolean_t is_insensitive = B_FALSE;
3567 char parent[MAXNAMELEN];
3568 nvlist_t *event;
3569
3570 if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3571 return (SET_ERROR(EINVAL));
3572 type = type32;
3573 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3574
3575 switch (type) {
3576 case DMU_OST_ZFS:
3577 cbfunc = zfs_create_cb;
3578 break;
3579
3580 case DMU_OST_ZVOL:
3581 cbfunc = zvol_create_cb;
3582 break;
3583
3584 default:
3585 cbfunc = NULL;
3586 break;
3587 }
3588 if (strchr(fsname, '@') ||
3589 strchr(fsname, '%'))
3590 return (SET_ERROR(EINVAL));
3591
3592 zct.zct_props = nvprops;
3593
3594 if (cbfunc == NULL)
3595 return (SET_ERROR(EINVAL));
3596
3597 if (zfs_get_parent(fsname, parent, MAXNAMELEN) == 0 &&
3598 zfs_is_wormed(parent)) {
3599 return (SET_ERROR(EPERM));
3600 }
3601
3602 if (type == DMU_OST_ZVOL) {
3603 uint64_t volsize, volblocksize;
3604
3605 if (nvprops == NULL)
3606 return (SET_ERROR(EINVAL));
3607 if (nvlist_lookup_uint64(nvprops,
3608 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3609 return (SET_ERROR(EINVAL));
3610
3611 if ((error = nvlist_lookup_uint64(nvprops,
3612 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3613 &volblocksize)) != 0 && error != ENOENT)
3614 return (SET_ERROR(EINVAL));
3615
3616 if (error != 0)
3617 volblocksize = zfs_prop_default_numeric(
3618 ZFS_PROP_VOLBLOCKSIZE);
3619
3620 if ((error = zvol_check_volblocksize(
3621 volblocksize)) != 0 ||
3622 (error = zvol_check_volsize(volsize,
3623 volblocksize)) != 0)
3624 return (error);
3625 } else if (type == DMU_OST_ZFS) {
3626 /*
3627 * We have to have normalization and
3628 * case-folding flags correct when we do the
3629 * file system creation, so go figure them out
3630 * now.
3631 */
3632 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3633 NV_UNIQUE_NAME, KM_SLEEP) == 0);
3634 error = zfs_fill_zplprops(fsname, nvprops,
3635 zct.zct_zplprops, &is_insensitive);
3636 if (error != 0) {
3637 nvlist_free(zct.zct_zplprops);
3638 return (error);
3639 }
3640 }
3641
3642 error = dmu_objset_create(fsname, type,
3643 is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3644 nvlist_free(zct.zct_zplprops);
3645
3646 /*
3647 * It would be nice to do this atomically.
3648 */
3649 if (error == 0) {
3650 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3651 nvprops, outnvl);
3652 if (error != 0)
3653 (void) dsl_destroy_head(fsname);
3654 }
3655
3656 if (error == 0) {
3657 event = fnvlist_alloc();
3658 fnvlist_add_string(event, "fsname", fsname);
3659 fnvlist_add_int32(event, "type", type);
3660 if (nvprops != NULL)
3661 fnvlist_add_nvlist(event, "properties", nvprops);
3662 zfs_event_post(ZFS_EC_STATUS, "create", event);
3663 }
3664
3665 return (error);
3666 }
3667
3668 /*
3669 * innvl: {
3670 * "origin" -> name of origin snapshot
3671 * (optional) "props" -> { prop -> value }
3672 * }
3673 *
3674 * outnvl: propname -> error code (int32)
3675 */
3676 static int
3677 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3678 {
3679 int error = 0;
3680 nvlist_t *nvprops = NULL;
3681 char *origin_name, *origin_snap;
3682 nvlist_t *event;
3683
3684 if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3685 return (SET_ERROR(EINVAL));
3686
3687 origin_snap = strchr(origin_name, '@');
3688 if (!origin_snap)
3689 return (SET_ERROR(EINVAL));
3690
3691 if (autosnap_check_name(origin_snap))
3692 return (SET_ERROR(EPERM));
3693
3694 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3695
3696 if (strchr(fsname, '@') ||
3697 strchr(fsname, '%'))
3698 return (SET_ERROR(EINVAL));
3699
3700 if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3701 return (SET_ERROR(EINVAL));
3702
3703 error = dmu_objset_clone(fsname, origin_name);
3704 if (error != 0)
3705 return (error);
3706
3707 /*
3708 * It would be nice to do this atomically.
3709 */
3710 if (error == 0) {
3711 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3712 nvprops, outnvl);
3713 if (error != 0)
3714 (void) dsl_destroy_head(fsname);
3715 }
3716
3717 if (error == 0) {
3718 event = fnvlist_alloc();
3719 fnvlist_add_string(event, "origin", origin_name);
3720 fnvlist_add_string(event, "fsname", fsname);
3721 if (nvprops != NULL)
3722 fnvlist_add_nvlist(event, "properties", nvprops);
3723 zfs_event_post(ZFS_EC_STATUS, "clone", event);
3724 }
3725
3726 return (error);
3727 }
3728
3729 /*
3730 * innvl: {
3731 * "snaps" -> { snapshot1, snapshot2 }
3732 * (optional) "props" -> { prop -> value (string) }
3733 * }
3734 *
3735 * outnvl: snapshot -> error code (int32)
3736 */
3737 static int
3738 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3739 {
3740 nvlist_t *snaps;
3741 nvlist_t *props = NULL;
3742 int error, poollen;
3743 nvpair_t *pair;
3744 nvlist_t *event;
3745
3746 (void) nvlist_lookup_nvlist(innvl, "props", &props);
3747 if ((error = zfs_check_userprops(poolname, props)) != 0)
3748 return (error);
3749
3750 if (!nvlist_empty(props) &&
3751 zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3752 return (SET_ERROR(ENOTSUP));
3753
3754 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3755 return (SET_ERROR(EINVAL));
3756 poollen = strlen(poolname);
3757 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3758 pair = nvlist_next_nvpair(snaps, pair)) {
3759 const char *name = nvpair_name(pair);
3760 const char *cp = strchr(name, '@');
3761
3762 /*
3763 * The snap name must contain an @, and the part after it must
3764 * contain only valid characters.
3765 */
3766 if (cp == NULL ||
3767 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3768 return (SET_ERROR(EINVAL));
3769
3770 if (autosnap_check_name(cp))
3771 return (EINVAL);
3772
3773 /*
3774 * The snap must be in the specified pool.
3775 */
3776 if (strncmp(name, poolname, poollen) != 0 ||
3777 (name[poollen] != '/' && name[poollen] != '@'))
3778 return (SET_ERROR(EXDEV));
3779
3780 /* This must be the only snap of this fs. */
3781 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3782 pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3783 if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3784 == 0) {
3785 return (SET_ERROR(EXDEV));
3786 }
3787 }
3788 }
3789
3790 error = dsl_dataset_snapshot(snaps, props, outnvl);
3791
3792 event = fnvlist_alloc();
3793 fnvlist_add_nvlist(event, "snaps", snaps);
3794 fnvlist_add_nvlist(event, "errors", outnvl);
3795 fnvlist_add_string(event, "pool", poolname);
3796 zfs_event_post(ZFS_EC_STATUS, "snapshot", event);
3797
3798 return (error);
3799 }
3800
3801 /*
3802 * innvl: "message" -> string
3803 */
3804 /* ARGSUSED */
3805 static int
3806 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3807 {
3808 char *message;
3809 spa_t *spa;
3810 int error;
3811 char *poolname;
3812
3813 /*
3814 * The poolname in the ioctl is not set, we get it from the TSD,
3815 * which was set at the end of the last successful ioctl that allows
3816 * logging. The secpolicy func already checked that it is set.
3817 * Only one log ioctl is allowed after each successful ioctl, so
3818 * we clear the TSD here.
3819 */
3820 poolname = tsd_get(zfs_allow_log_key);
3821 (void) tsd_set(zfs_allow_log_key, NULL);
3822 error = spa_open(poolname, &spa, FTAG);
3823 strfree(poolname);
3824 if (error != 0)
3825 return (error);
3826
3827 if (nvlist_lookup_string(innvl, "message", &message) != 0) {
3828 spa_close(spa, FTAG);
3829 return (SET_ERROR(EINVAL));
3830 }
3831
3832 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3833 spa_close(spa, FTAG);
3834 return (SET_ERROR(ENOTSUP));
3835 }
3836
3837 error = spa_history_log(spa, message);
3838 spa_close(spa, FTAG);
3839 return (error);
3840 }
3841
3842 /*
3843 * The dp_config_rwlock must not be held when calling this, because the
3844 * unmount may need to write out data.
3845 *
3846 * This function is best-effort. Callers must deal gracefully if it
3847 * remains mounted (or is remounted after this call).
3848 *
3849 * Returns 0 if the argument is not a snapshot, or it is not currently a
3850 * filesystem, or we were able to unmount it. Returns error code otherwise.
3851 */
3852 void
3853 zfs_unmount_snap(const char *snapname)
3854 {
3855 vfs_t *vfsp = NULL;
3856 zfsvfs_t *zfsvfs = NULL;
3857
3858 if (strchr(snapname, '@') == NULL)
3859 return;
3860
3861 int err = getzfsvfs(snapname, &zfsvfs);
3862 if (err != 0) {
3863 ASSERT3P(zfsvfs, ==, NULL);
3864 return;
3865 }
3866 vfsp = zfsvfs->z_vfs;
3867
3868 ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3869
3870 err = vn_vfswlock(vfsp->vfs_vnodecovered);
3871 VFS_RELE(vfsp);
3872 if (err != 0)
3873 return;
3874
3875 /*
3876 * Always force the unmount for snapshots.
3877 */
3878 (void) dounmount(vfsp, MS_FORCE, kcred);
3879 }
3880
3881 /* ARGSUSED */
3882 static int
3883 zfs_unmount_snap_cb(const char *snapname, void *arg)
3884 {
3885 zfs_unmount_snap(snapname);
3886 return (0);
3887 }
3888
3889 /*
3890 * When a clone is destroyed, its origin may also need to be destroyed,
3891 * in which case it must be unmounted. This routine will do that unmount
3892 * if necessary.
3893 */
3894 void
3895 zfs_destroy_unmount_origin(const char *fsname)
3896 {
3897 int error;
3898 objset_t *os;
3899 dsl_dataset_t *ds;
3900
3901 error = dmu_objset_hold(fsname, FTAG, &os);
3902 if (error != 0)
3903 return;
3904 ds = dmu_objset_ds(os);
3905 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3906 char originname[ZFS_MAX_DATASET_NAME_LEN];
3907 dsl_dataset_name(ds->ds_prev, originname);
3908 dmu_objset_rele(os, FTAG);
3909 zfs_unmount_snap(originname);
3910 } else {
3911 dmu_objset_rele(os, FTAG);
3912 }
3913 }
3914
3915 static int
3916 zfs_destroy_check_autosnap(spa_t *spa, const char *name)
3917 {
3918 const char *snap = strchr(name, '@');
3919
3920 if (snap == NULL)
3921 return (EINVAL);
3922
3923 if (autosnap_check_name(snap)) {
3924 int err = autosnap_check_for_destroy(
3925 spa_get_autosnap(spa), name);
3926
3927 if (err != 0)
3928 return (EBUSY);
3929 }
3930
3931 return (0);
3932 }
3933
3934 /*
3935 * innvl: {
3936 * "snaps" -> { snapshot1, snapshot2 }
3937 * (optional boolean) "defer"
3938 * }
3939 *
3940 * outnvl: snapshot -> error code (int32)
3941 *
3942 */
3943 /* ARGSUSED */
3944 static int
3945 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3946 {
3947 nvlist_t *snaps;
3948 nvpair_t *pair;
3949 boolean_t defer;
3950 int error = 0;
3951 nvlist_t *event;
3952 spa_t *spa;
3953
3954 if (zfs_is_wormed(poolname))
3955 return (SET_ERROR(EPERM));
3956
3957 if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3958 return (SET_ERROR(EINVAL));
3959 defer = nvlist_exists(innvl, "defer");
3960
3961 error = spa_open(poolname, &spa, FTAG);
3962 if (spa == NULL)
3963 return (error);
3964
3965 for (pair = nvlist_next_nvpair(snaps, NULL);
3966 pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) {
3967 error = zfs_destroy_check_autosnap(spa, nvpair_name(pair));
3968 if (error)
3969 fnvlist_add_int32(outnvl, nvpair_name(pair), error);
3970 }
3971
3972 spa_close(spa, FTAG);
3973
3974 if (error)
3975 return (error);
3976
3977 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3978 pair = nvlist_next_nvpair(snaps, pair)) {
3979 zfs_unmount_snap(nvpair_name(pair));
3980 }
3981
3982 error = dsl_destroy_snapshots_nvl(snaps, defer, outnvl);
3983
3984 if (error == 0) {
3985 event = fnvlist_alloc();
3986 fnvlist_add_nvlist(event, "snaps", snaps);
3987 fnvlist_add_nvlist(event, "errors", outnvl);
3988 zfs_event_post(ZFS_EC_STATUS, "destroy_snaps", event);
3989 }
3990
3991 return (error);
3992 }
3993
3994 /*
3995 * Create bookmarks. Bookmark names are of the form <fs>#<bmark>.
3996 * All bookmarks must be in the same pool.
3997 *
3998 * innvl: {
3999 * bookmark1 -> snapshot1, bookmark2 -> snapshot2
4000 * }
4001 *
4002 * outnvl: bookmark -> error code (int32)
4003 *
4004 */
4005 /* ARGSUSED */
4006 static int
4007 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4008 {
4009 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
4010 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
4011 char *snap_name;
4012
4013 /*
4014 * Verify the snapshot argument.
4015 */
4016 if (nvpair_value_string(pair, &snap_name) != 0)
4017 return (SET_ERROR(EINVAL));
4018
4019
4020 /* Verify that the keys (bookmarks) are unique */
4021 for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
4022 pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
4023 if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
4024 return (SET_ERROR(EINVAL));
4025 }
4026 }
4027
4028 return (dsl_bookmark_create(innvl, outnvl));
4029 }
4030
4031 /*
4032 * innvl: {
4033 * property 1, property 2, ...
4034 * }
4035 *
4036 * outnvl: {
4037 * bookmark name 1 -> { property 1, property 2, ... },
4038 * bookmark name 2 -> { property 1, property 2, ... }
4039 * }
4040 *
4041 */
4042 static int
4043 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4044 {
4045 return (dsl_get_bookmarks(fsname, innvl, outnvl));
4046 }
4047
4048 /*
4049 * innvl: {
4050 * bookmark name 1, bookmark name 2
4051 * }
4052 *
4053 * outnvl: bookmark -> error code (int32)
4054 *
4055 */
4056 static int
4057 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
4058 nvlist_t *outnvl)
4059 {
4060 int error, poollen;
4061
4062 poollen = strlen(poolname);
4063 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
4064 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
4065 const char *name = nvpair_name(pair);
4066 const char *cp = strchr(name, '#');
4067
4068 /*
4069 * The bookmark name must contain an #, and the part after it
4070 * must contain only valid characters.
4071 */
4072 if (cp == NULL ||
4073 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4074 return (SET_ERROR(EINVAL));
4075
4076 /*
4077 * The bookmark must be in the specified pool.
4078 */
4079 if (strncmp(name, poolname, poollen) != 0 ||
4080 (name[poollen] != '/' && name[poollen] != '#'))
4081 return (SET_ERROR(EXDEV));
4082 }
4083
4084 error = dsl_bookmark_destroy(innvl, outnvl);
4085 return (error);
4086 }
4087
4088 static int
4089 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
4090 nvlist_t *outnvl)
4091 {
4092 char *program;
4093 uint64_t instrlimit, memlimit;
4094 boolean_t sync_flag;
4095 nvpair_t *nvarg = NULL;
4096
4097 if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
4098 return (EINVAL);
4099 }
4100 if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
4101 sync_flag = B_TRUE;
4102 }
4103 if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
4104 instrlimit = ZCP_DEFAULT_INSTRLIMIT;
4105 }
4106 if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
4107 memlimit = ZCP_DEFAULT_MEMLIMIT;
4108 }
4109 if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
4110 return (EINVAL);
4111 }
4112
4113 if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
4114 return (EINVAL);
4115 if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
4116 return (EINVAL);
4117
4118 return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
4119 nvarg, outnvl));
4120 }
4121
4122 /*
4123 * inputs:
4124 * zc_name name of dataset to destroy
4125 * zc_objset_type type of objset
4126 * zc_defer_destroy mark for deferred destroy
4127 * zc_guid if set, do atomical recursive destroy
4128 *
4129 * outputs: none
4130 */
4131 static int
4132 zfs_ioc_destroy(zfs_cmd_t *zc)
4133 {
4134 int err;
4135 nvlist_t *event;
4136
4137 if (zfs_is_wormed(zc->zc_name))
4138 return (SET_ERROR(EPERM));
4139
4140 if (zc->zc_objset_type == DMU_OST_ZFS)
4141 zfs_unmount_snap(zc->zc_name);
4142
4143 if (zc->zc_guid) {
4144 spa_t *spa;
4145
4146 if ((err = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4147 return (err);
4148
4149 err = autosnap_lock(spa, RW_WRITER);
4150 if (err == 0) {
4151 err = wbc_walk_lock(spa);
4152 if (err != 0)
4153 autosnap_unlock(spa);
4154 }
4155
4156 if (err == 0) {
4157 err = dsl_destroy_atomically(zc->zc_name,
4158 zc->zc_defer_destroy);
4159 wbc_walk_unlock(spa);
4160 autosnap_unlock(spa);
4161 }
4162
4163 spa_close(spa, FTAG);
4164 } else {
4165 if (strchr(zc->zc_name, '@')) {
4166 spa_t *spa = NULL;
4167
4168 err = spa_open(zc->zc_name, &spa, FTAG);
4169 if (err != 0)
4170 return (err);
4171
4172 err = zfs_destroy_check_autosnap(spa, zc->zc_name);
4173 if (err == 0) {
4174 err = dsl_destroy_snapshot(zc->zc_name,
4175 zc->zc_defer_destroy);
4176 }
4177
4178 spa_close(spa, FTAG);
4179 } else {
4180 err = dsl_destroy_head(zc->zc_name);
4181 if (err == EEXIST) {
4182 /*
4183 * It is possible that the given DS may have
4184 * hidden child (%recv) datasets - "leftovers"
4185 * resulting from the previously interrupted
4186 * 'zfs receive'.
4187 */
4188 char namebuf[ZFS_MAX_DATASET_NAME_LEN];
4189
4190 if (snprintf(namebuf, sizeof (namebuf),
4191 "%s/%%recv", zc->zc_name) >=
4192 sizeof (namebuf))
4193 return (err);
4194
4195 /* Try to remove the hidden child (%recv) */
4196 err = dsl_destroy_head(namebuf);
4197 if (err == 0) {
4198 /*
4199 * Now the given DS should not have
4200 * children, so we can try to remove
4201 * it again
4202 */
4203 err = dsl_destroy_head(zc->zc_name);
4204 } else if (err == ENOENT) {
4205 /*
4206 * The hidden child (%recv) does not
4207 * exist, so need to restore original
4208 * error
4209 */
4210 err = EEXIST;
4211 }
4212
4213 }
4214 }
4215 }
4216 if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
4217 (void) zvol_remove_minor(zc->zc_name);
4218
4219 if (err == 0) {
4220 event = fnvlist_alloc();
4221 fnvlist_add_string(event, "fsname", zc->zc_name);
4222 fnvlist_add_int32(event, "type", zc->zc_objset_type);
4223 zfs_event_post(ZFS_EC_STATUS, "destroy", event);
4224 }
4225
4226 return (err);
4227 }
4228
4229 /*
4230 * fsname is name of dataset to rollback (to most recent snapshot)
4231 *
4232 * innvl may contain name of expected target snapshot
4233 *
4234 * outnvl: "target" -> name of most recent snapshot
4235 * }
4236 */
4237 /* ARGSUSED */
4238 static int
4239 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4240 {
4241 zfsvfs_t *zfsvfs;
4242 char *target = NULL;
4243 int error;
4244 nvlist_t *event;
4245 int resume_err = 0;
4246
4247 if (zfs_is_wormed(fsname))
4248 return (SET_ERROR(EPERM));
4249
4250 (void) nvlist_lookup_string(innvl, "target", &target);
4251 if (target != NULL) {
4252 const char *cp = strchr(target, '@');
4253
4254 /*
4255 * The snap name must contain an @, and the part after it must
4256 * contain only valid characters.
4257 */
4258 if (cp == NULL ||
4259 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4260 return (SET_ERROR(EINVAL));
4261 }
4262
4263 if (getzfsvfs(fsname, &zfsvfs) == 0) {
4264 dsl_dataset_t *ds;
4265
4266 ds = dmu_objset_ds(zfsvfs->z_os);
4267 error = zfs_suspend_fs(zfsvfs);
4268 if (error == 0) {
4269 error = dsl_dataset_rollback(fsname, target, zfsvfs,
4270 outnvl);
4271 resume_err = zfs_resume_fs(zfsvfs, ds);
4272 }
4273 VFS_RELE(zfsvfs->z_vfs);
4274 } else {
4275 error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
4276 }
4277
4278 if (error == 0) {
4279 event = fnvlist_alloc();
4280 fnvlist_add_string(event, "target", (target != NULL) ? target : "");
4281 fnvlist_add_string(event, "fsname", fsname);
4282 fnvlist_add_int32(event, "resume_err", resume_err);
4283 zfs_event_post(ZFS_EC_STATUS, "rollback", event);
4284 }
4285
4286 error = (error != 0) ? error : resume_err;
4287 return (error);
4288 }
4289
4290 static int
4291 recursive_unmount(const char *fsname, void *arg)
4292 {
4293 const char *snapname = arg;
4294 char fullname[ZFS_MAX_DATASET_NAME_LEN];
4295
4296 (void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
4297 zfs_unmount_snap(fullname);
4298
4299 return (0);
4300 }
4301
4302 /*
4303 * inputs:
4304 * zc_name old name of dataset
4305 * zc_value new name of dataset
4306 * zc_cookie recursive flag (only valid for snapshots)
4307 *
4308 * outputs: none
4309 */
4310 static int
4311 zfs_ioc_rename(zfs_cmd_t *zc)
4312 {
4313 boolean_t recursive = zc->zc_cookie & 1;
4314 char *at;
4315 nvlist_t *event;
4316 int error;
4317
4318 if (zfs_is_wormed(zc->zc_name))
4319 return (SET_ERROR(EPERM));
4320
4321 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
4322 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4323 strchr(zc->zc_value, '%'))
4324 return (SET_ERROR(EINVAL));
4325
4326 at = strchr(zc->zc_name, '@');
4327 if (at != NULL) {
4328 /* snaps must be in same fs */
4329
4330 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
4331 return (SET_ERROR(EXDEV));
4332 *at = '\0';
4333 if (zc->zc_objset_type == DMU_OST_ZFS) {
4334 error = dmu_objset_find(zc->zc_name,
4335 recursive_unmount, at + 1,
4336 recursive ? DS_FIND_CHILDREN : 0);
4337 if (error != 0) {
4338 *at = '@';
4339 return (error);
4340 }
4341 }
4342 error = dsl_dataset_rename_snapshot(zc->zc_name,
4343 at + 1, strchr(zc->zc_value, '@') + 1, recursive);
4344 *at = '@';
4345
4346 } else {
4347 if (zc->zc_objset_type == DMU_OST_ZVOL)
4348 (void) zvol_remove_minor(zc->zc_name);
4349 error = dsl_dir_rename(zc->zc_name, zc->zc_value);
4350 }
4351
4352 if (error == 0) {
4353 event = fnvlist_alloc();
4354 fnvlist_add_string(event, "origin", zc->zc_name);
4355 fnvlist_add_string(event, "fsname", zc->zc_value);
4356 fnvlist_add_int32(event, "type", zc->zc_objset_type);
4357 zfs_event_post(ZFS_EC_STATUS, "rename", event);
4358 }
4359
4360 return (error);
4361 }
4362
4363 static int
4364 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
4365 {
4366 const char *propname = nvpair_name(pair);
4367 boolean_t issnap = (strchr(dsname, '@') != NULL);
4368 zfs_prop_t prop = zfs_name_to_prop(propname);
4369 uint64_t intval;
4370 int err;
4371
4372 if (prop == ZPROP_INVAL) {
4373 if (zfs_prop_user(propname)) {
4374 if (err = zfs_secpolicy_write_perms(dsname,
4375 ZFS_DELEG_PERM_USERPROP, cr))
4376 return (err);
4377 return (0);
4378 }
4379
4380 if (!issnap && zfs_prop_userquota(propname)) {
4381 const char *perm = NULL;
4382 const char *uq_prefix =
4383 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
4384 const char *gq_prefix =
4385 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
4386
4387 if (strncmp(propname, uq_prefix,
4388 strlen(uq_prefix)) == 0) {
4389 perm = ZFS_DELEG_PERM_USERQUOTA;
4390 } else if (strncmp(propname, gq_prefix,
4391 strlen(gq_prefix)) == 0) {
4392 perm = ZFS_DELEG_PERM_GROUPQUOTA;
4393 } else {
4394 /* USERUSED and GROUPUSED are read-only */
4395 return (SET_ERROR(EINVAL));
4396 }
4397
4398 if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
4399 return (err);
4400 return (0);
4401 }
4402
4403 return (SET_ERROR(EINVAL));
4404 }
4405
4406 if (issnap)
4407 return (SET_ERROR(EINVAL));
4408
4409 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
4410 /*
4411 * dsl_prop_get_all_impl() returns properties in this
4412 * format.
4413 */
4414 nvlist_t *attrs;
4415 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
4416 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4417 &pair) == 0);
4418 }
4419
4420 /*
4421 * Check that this value is valid for this pool version
4422 */
4423 switch (prop) {
4424 case ZFS_PROP_COMPRESSION:
4425 /*
4426 * If the user specified gzip compression, make sure
4427 * the SPA supports it. We ignore any errors here since
4428 * we'll catch them later.
4429 */
4430 if (nvpair_value_uint64(pair, &intval) == 0) {
4431 if (intval >= ZIO_COMPRESS_GZIP_1 &&
4432 intval <= ZIO_COMPRESS_GZIP_9 &&
4433 zfs_earlier_version(dsname,
4434 SPA_VERSION_GZIP_COMPRESSION)) {
4435 return (SET_ERROR(ENOTSUP));
4436 }
4437
4438 if (intval == ZIO_COMPRESS_ZLE &&
4439 zfs_earlier_version(dsname,
4440 SPA_VERSION_ZLE_COMPRESSION))
4441 return (SET_ERROR(ENOTSUP));
4442
4443 if (intval == ZIO_COMPRESS_LZ4) {
4444 spa_t *spa;
4445
4446 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4447 return (err);
4448
4449 if (!spa_feature_is_enabled(spa,
4450 SPA_FEATURE_LZ4_COMPRESS)) {
4451 spa_close(spa, FTAG);
4452 return (SET_ERROR(ENOTSUP));
4453 }
4454 spa_close(spa, FTAG);
4455 }
4456
4457 /*
4458 * If this is a bootable dataset then
4459 * verify that the compression algorithm
4460 * is supported for booting. We must return
4461 * something other than ENOTSUP since it
4462 * implies a downrev pool version.
4463 */
4464 if (zfs_is_bootfs(dsname) &&
4465 !BOOTFS_COMPRESS_VALID(intval)) {
4466 return (SET_ERROR(ERANGE));
4467 }
4468 }
4469 break;
4470
4471 case ZFS_PROP_COPIES:
4472 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4473 return (SET_ERROR(ENOTSUP));
4474 break;
4475
4476 case ZFS_PROP_RECORDSIZE:
4477 /* Record sizes above 128k need the feature to be enabled */
4478 if (nvpair_value_uint64(pair, &intval) == 0 &&
4479 intval > SPA_OLD_MAXBLOCKSIZE) {
4480 spa_t *spa;
4481
4482 /*
4483 * We don't allow setting the property above 1MB,
4484 * unless the tunable has been changed.
4485 */
4486 if (intval > zfs_max_recordsize ||
4487 intval > SPA_MAXBLOCKSIZE)
4488 return (SET_ERROR(ERANGE));
4489
4490 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4491 return (err);
4492
4493 if (!spa_feature_is_enabled(spa,
4494 SPA_FEATURE_LARGE_BLOCKS)) {
4495 spa_close(spa, FTAG);
4496 return (SET_ERROR(ENOTSUP));
4497 }
4498 spa_close(spa, FTAG);
4499 }
4500 break;
4501
4502 case ZFS_PROP_SHARESMB:
4503 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4504 return (SET_ERROR(ENOTSUP));
4505 break;
4506
4507 case ZFS_PROP_ACLINHERIT:
4508 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4509 nvpair_value_uint64(pair, &intval) == 0) {
4510 if (intval == ZFS_ACL_PASSTHROUGH_X &&
4511 zfs_earlier_version(dsname,
4512 SPA_VERSION_PASSTHROUGH_X))
4513 return (SET_ERROR(ENOTSUP));
4514 }
4515 break;
4516
4517 case ZFS_PROP_WBC_MODE:
4518 {
4519 spa_t *spa;
4520 boolean_t wbc_feature_enabled;
4521
4522 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4523 return (err);
4524
4525 wbc_feature_enabled =
4526 spa_feature_is_enabled(spa, SPA_FEATURE_WBC);
4527 spa_close(spa, FTAG);
4528
4529 /* WBC cannot be used without special-vdev */
4530 if (!wbc_feature_enabled || !spa_has_special(spa))
4531 return (SET_ERROR(EKZFS_WBCNOTSUP));
4532
4533 /*
4534 * We do not want to have races, because on
4535 * import or after reboot WBC does registration
4536 * asynchronously.
4537 */
4538 if (!spa->spa_wbc.wbc_ready_to_use)
4539 return (SET_ERROR(EBUSY));
4540 }
4541 break;
4542
4543 case ZFS_PROP_CHECKSUM:
4544 case ZFS_PROP_DEDUP:
4545 {
4546 spa_feature_t feature;
4547 spa_t *spa;
4548
4549 /* dedup feature version checks */
4550 if (prop == ZFS_PROP_DEDUP &&
4551 zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4552 return (SET_ERROR(ENOTSUP));
4553
4554 if (nvpair_value_uint64(pair, &intval) != 0)
4555 return (SET_ERROR(EINVAL));
4556
4557 /* check prop value is enabled in features */
4558 feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4559 if (feature == SPA_FEATURE_NONE)
4560 break;
4561
4562 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4563 return (err);
4564 /*
4565 * Salted checksums are not supported on root pools.
4566 */
4567 if (spa_bootfs(spa) != 0 &&
4568 intval < ZIO_CHECKSUM_FUNCTIONS &&
4569 (zio_checksum_table[intval].ci_flags &
4570 ZCHECKSUM_FLAG_SALTED)) {
4571 spa_close(spa, FTAG);
4572 return (SET_ERROR(ERANGE));
4573 }
4574 if (!spa_feature_is_enabled(spa, feature)) {
4575 spa_close(spa, FTAG);
4576 return (SET_ERROR(ENOTSUP));
4577 }
4578 spa_close(spa, FTAG);
4579 break;
4580 }
4581 }
4582
4583 return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4584 }
4585
4586 /*
4587 * Checks for a race condition to make sure we don't increment a feature flag
4588 * multiple times.
4589 */
4590 static int
4591 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4592 {
4593 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4594 spa_feature_t *featurep = arg;
4595
4596 if (!spa_feature_is_active(spa, *featurep))
4597 return (0);
4598 else
4599 return (SET_ERROR(EBUSY));
4600 }
4601
4602 /*
4603 * The callback invoked on feature activation in the sync task caused by
4604 * zfs_prop_activate_feature.
4605 */
4606 static void
4607 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4608 {
4609 spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4610 spa_feature_t *featurep = arg;
4611
4612 spa_feature_incr(spa, *featurep, tx);
4613 }
4614
4615 /*
4616 * Activates a feature on a pool in response to a property setting. This
4617 * creates a new sync task which modifies the pool to reflect the feature
4618 * as being active.
4619 */
4620 static int
4621 zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4622 {
4623 int err;
4624
4625 /* EBUSY here indicates that the feature is already active */
4626 err = dsl_sync_task(spa_name(spa),
4627 zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4628 &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4629
4630 if (err != 0 && err != EBUSY)
4631 return (err);
4632 else
4633 return (0);
4634 }
4635
4636 /*
4637 * Removes properties from the given props list that fail permission checks
4638 * needed to clear them and to restore them in case of a receive error. For each
4639 * property, make sure we have both set and inherit permissions.
4640 *
4641 * Returns the first error encountered if any permission checks fail. If the
4642 * caller provides a non-NULL errlist, it also gives the complete list of names
4643 * of all the properties that failed a permission check along with the
4644 * corresponding error numbers. The caller is responsible for freeing the
4645 * returned errlist.
4646 *
4647 * If every property checks out successfully, zero is returned and the list
4648 * pointed at by errlist is NULL.
4649 */
4650 static int
4651 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4652 {
4653 zfs_cmd_t *zc;
4654 nvpair_t *pair, *next_pair;
4655 nvlist_t *errors;
4656 int err, rv = 0;
4657
4658 if (props == NULL)
4659 return (0);
4660
4661 VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4662
4663 zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4664 (void) strcpy(zc->zc_name, dataset);
4665 pair = nvlist_next_nvpair(props, NULL);
4666 while (pair != NULL) {
4667 next_pair = nvlist_next_nvpair(props, pair);
4668
4669 (void) strcpy(zc->zc_value, nvpair_name(pair));
4670 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4671 (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4672 VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4673 VERIFY(nvlist_add_int32(errors,
4674 zc->zc_value, err) == 0);
4675 }
4676 pair = next_pair;
4677 }
4678 kmem_free(zc, sizeof (zfs_cmd_t));
4679
4680 if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4681 nvlist_free(errors);
4682 errors = NULL;
4683 } else {
4684 VERIFY(nvpair_value_int32(pair, &rv) == 0);
4685 }
4686
4687 if (errlist == NULL)
4688 nvlist_free(errors);
4689 else
4690 *errlist = errors;
4691
4692 return (rv);
4693 }
4694
4695 static boolean_t
4696 propval_equals(nvpair_t *p1, nvpair_t *p2)
4697 {
4698 if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4699 /* dsl_prop_get_all_impl() format */
4700 nvlist_t *attrs;
4701 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4702 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4703 &p1) == 0);
4704 }
4705
4706 if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4707 nvlist_t *attrs;
4708 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4709 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4710 &p2) == 0);
4711 }
4712
4713 if (nvpair_type(p1) != nvpair_type(p2))
4714 return (B_FALSE);
4715
4716 if (nvpair_type(p1) == DATA_TYPE_STRING) {
4717 char *valstr1, *valstr2;
4718
4719 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4720 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4721 return (strcmp(valstr1, valstr2) == 0);
4722 } else {
4723 uint64_t intval1, intval2;
4724
4725 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4726 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4727 return (intval1 == intval2);
4728 }
4729 }
4730
4731 /*
4732 * Remove properties from props if they are not going to change (as determined
4733 * by comparison with origprops). Remove them from origprops as well, since we
4734 * do not need to clear or restore properties that won't change.
4735 */
4736 static void
4737 props_reduce(nvlist_t *props, nvlist_t *origprops)
4738 {
4739 nvpair_t *pair, *next_pair;
4740
4741 if (origprops == NULL)
4742 return; /* all props need to be received */
4743
4744 pair = nvlist_next_nvpair(props, NULL);
4745 while (pair != NULL) {
4746 const char *propname = nvpair_name(pair);
4747 nvpair_t *match;
4748
4749 next_pair = nvlist_next_nvpair(props, pair);
4750
4751 if ((nvlist_lookup_nvpair(origprops, propname,
4752 &match) != 0) || !propval_equals(pair, match))
4753 goto next; /* need to set received value */
4754
4755 /* don't clear the existing received value */
4756 (void) nvlist_remove_nvpair(origprops, match);
4757 /* don't bother receiving the property */
4758 (void) nvlist_remove_nvpair(props, pair);
4759 next:
4760 pair = next_pair;
4761 }
4762 }
4763
4764 /*
4765 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4766 * For example, refquota cannot be set until after the receipt of a dataset,
4767 * because in replication streams, an older/earlier snapshot may exceed the
4768 * refquota. We want to receive the older/earlier snapshot, but setting
4769 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4770 * the older/earlier snapshot from being received (with EDQUOT).
4771 *
4772 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4773 *
4774 * libzfs will need to be judicious handling errors encountered by props
4775 * extracted by this function.
4776 */
4777 static nvlist_t *
4778 extract_delay_props(nvlist_t *props)
4779 {
4780 nvlist_t *delayprops;
4781 nvpair_t *nvp, *tmp;
4782 static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4783 int i;
4784
4785 VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4786
4787 for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4788 nvp = nvlist_next_nvpair(props, nvp)) {
4789 /*
4790 * strcmp() is safe because zfs_prop_to_name() always returns
4791 * a bounded string.
4792 */
4793 for (i = 0; delayable[i] != 0; i++) {
4794 if (strcmp(zfs_prop_to_name(delayable[i]),
4795 nvpair_name(nvp)) == 0) {
4796 break;
4797 }
4798 }
4799 if (delayable[i] != 0) {
4800 tmp = nvlist_prev_nvpair(props, nvp);
4801 VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4802 VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4803 nvp = tmp;
4804 }
4805 }
4806
4807 if (nvlist_empty(delayprops)) {
4808 nvlist_free(delayprops);
4809 delayprops = NULL;
4810 }
4811 return (delayprops);
4812 }
4813
4814 #ifdef DEBUG
4815 static boolean_t zfs_ioc_recv_inject_err;
4816 #endif
4817
4818 int
4819 dmu_recv_impl(int fd, char *tofs, char *tosnap, char *origin,
4820 dmu_replay_record_t *drr_begin, boolean_t is_resumable, nvlist_t *props,
4821 nvlist_t *errors, uint64_t *errf, int cfd, uint64_t *ahdl, uint64_t *sz,
4822 boolean_t force, dmu_krrp_task_t *krrp_task)
4823 {
4824 file_t *fp = getf(fd);
4825 dmu_recv_cookie_t drc;
4826 int error = 0;
4827 int props_error = 0;
4828 offset_t off;
4829 nvlist_t *origprops = NULL; /* existing properties */
4830 nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4831 boolean_t first_recvd_props = B_FALSE;
4832 nvlist_t *event;
4833 boolean_t force_cksum =
4834 !krrp_task || krrp_task->buffer_args.force_cksum;
4835
4836 ASSERT(fp || krrp_task);
4837
4838 error = dmu_recv_begin(tofs, tosnap,
4839 drr_begin, force, is_resumable, force_cksum, origin, &drc);
4840
4841 if (error != 0)
4842 goto out;
4843
4844 drc.drc_krrp_task = krrp_task;
4845 /*
4846 * Set properties before we receive the stream so that they are applied
4847 * to the new data. Note that we must call dmu_recv_stream() if
4848 * dmu_recv_begin() succeeds.
4849 */
4850 if (props != NULL && !drc.drc_newfs) {
4851 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4852 SPA_VERSION_RECVD_PROPS &&
4853 !dsl_prop_get_hasrecvd(tofs))
4854 first_recvd_props = B_TRUE;
4855
4856 /*
4857 * If new received properties are supplied, they are to
4858 * completely replace the existing received properties, so stash
4859 * away the existing ones.
4860 */
4861 if (dsl_prop_get_received(tofs, &origprops) == 0) {
4862 nvlist_t *errlist = NULL;
4863 /*
4864 * Don't bother writing a property if its value won't
4865 * change (and avoid the unnecessary security checks).
4866 *
4867 * The first receive after SPA_VERSION_RECVD_PROPS is a
4868 * special case where we blow away all local properties
4869 * regardless.
4870 */
4871 if (!first_recvd_props)
4872 props_reduce(props, origprops);
4873 if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4874 (void) nvlist_merge(errors, errlist, 0);
4875 nvlist_free(errlist);
4876
4877 if (clear_received_props(tofs, origprops,
4878 first_recvd_props ? NULL : props) != 0)
4879 *errf |= ZPROP_ERR_NOCLEAR;
4880 } else {
4881 *errf |= ZPROP_ERR_NOCLEAR;
4882 }
4883 }
4884
4885 if (props != NULL) {
4886 props_error = dsl_prop_set_hasrecvd(tofs);
4887
4888 if (props_error == 0) {
4889 delayprops = extract_delay_props(props);
4890 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4891 props, errors);
4892 }
4893 }
4894
4895 if (fp) {
4896 off = fp->f_offset;
4897 } else {
4898 off = 0;
4899 }
4900 error = dmu_recv_stream(&drc, fp ? fp->f_vnode : NULL,
4901 &off, cfd, ahdl, krrp_task);
4902
4903 if (error == 0) {
4904 zfsvfs_t *zfsvfs = NULL;
4905
4906 error = getzfsvfs(tofs, &zfsvfs);
4907 if (error == 0) {
4908 /* online recv */
4909 dsl_dataset_t *ds;
4910 int end_err;
4911
4912 ds = dmu_objset_ds(zfsvfs->z_os);
4913 error = zfs_suspend_fs(zfsvfs);
4914 /*
4915 * If the suspend fails, then the recv_end will
4916 * likely also fail, and clean up after itself.
4917 */
4918 end_err = dmu_recv_end(&drc, zfsvfs);
4919 if (error == 0)
4920 error = zfs_resume_fs(zfsvfs, ds);
4921 error = error ? error : end_err;
4922 VFS_RELE(zfsvfs->z_vfs);
4923 } else {
4924 error = dmu_recv_end(&drc, NULL);
4925 }
4926
4927 /* Set delayed properties now, after we're done receiving. */
4928 if (delayprops != NULL && error == 0) {
4929 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4930 delayprops, errors);
4931 }
4932 }
4933
4934 if (delayprops != NULL) {
4935 /*
4936 * Merge delayed props back in with initial props, in case
4937 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4938 * we have to make sure clear_received_props() includes
4939 * the delayed properties).
4940 *
4941 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4942 * using ASSERT() will be just like a VERIFY.
4943 */
4944 ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4945 nvlist_free(delayprops);
4946 }
4947
4948 if (fp) {
4949 *sz = off - fp->f_offset;
4950 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4951 fp->f_offset = off;
4952 } else {
4953 *sz = off;
4954 }
4955 if (error == 0) {
4956 char val[MAXNAMELEN];
4957
4958 (void) strcpy(val, tofs);
4959 (void) strcat(val, "@");
4960 (void) strcat(val, tosnap);
4961
4962 event = fnvlist_alloc();
4963 if (props != NULL)
4964 fnvlist_add_nvlist(event, "props", props);
4965 fnvlist_add_string(event, "origin", tofs);
4966 fnvlist_add_string(event, "tosnap", val);
4967 fnvlist_add_uint64(event, "bytes", *sz);
4968 fnvlist_add_boolean_value(event, "newds", drc.drc_newfs);
4969 zfs_event_post(ZFS_EC_STATUS, "recv", event);
4970 }
4971
4972 #ifdef DEBUG
4973 if (zfs_ioc_recv_inject_err) {
4974 zfs_ioc_recv_inject_err = B_FALSE;
4975 error = 1;
4976 }
4977 #endif
4978 /*
4979 * On error, restore the original props.
4980 */
4981 if (error != 0 && props != NULL && !drc.drc_newfs) {
4982 if (clear_received_props(tofs, props, NULL) != 0) {
4983 /*
4984 * We failed to clear the received properties.
4985 * Since we may have left a $recvd value on the
4986 * system, we can't clear the $hasrecvd flag.
4987 */
4988 *errf |= ZPROP_ERR_NORESTORE;
4989 } else if (first_recvd_props) {
4990 dsl_prop_unset_hasrecvd(tofs);
4991 }
4992
4993 if (origprops == NULL && !drc.drc_newfs) {
4994 /* We failed to stash the original properties. */
4995 *errf |= ZPROP_ERR_NORESTORE;
4996 }
4997
4998 /*
4999 * dsl_props_set() will not convert RECEIVED to LOCAL on or
5000 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
5001 * explictly if we're restoring local properties cleared in the
5002 * first new-style receive.
5003 */
5004 if (origprops != NULL &&
5005 zfs_set_prop_nvlist(tofs, (first_recvd_props ?
5006 ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
5007 origprops, NULL) != 0) {
5008 /*
5009 * We stashed the original properties but failed to
5010 * restore them.
5011 */
5012 *errf |= ZPROP_ERR_NORESTORE;
5013 }
5014 }
5015 out:
5016 nvlist_free(origprops);
5017 if (fp)
5018 releasef(fd);
5019
5020 if (error == 0)
5021 error = props_error;
5022
5023 return (error);
5024 }
5025
5026 /*
5027 * inputs:
5028 * zc_name name of containing filesystem
5029 * zc_nvlist_src{_size} nvlist of properties to apply
5030 * zc_value name of snapshot to create
5031 * zc_string name of clone origin (if DRR_FLAG_CLONE)
5032 * zc_cookie file descriptor to recv from
5033 * zc_begin_record the BEGIN record of the stream (not byteswapped)
5034 * zc_guid force flag
5035 * zc_cleanup_fd cleanup-on-exit file descriptor
5036 * zc_action_handle handle for this guid/ds mapping (or zero on first call)
5037 * zc_resumable if data is incomplete assume sender will resume
5038 *
5039 * outputs:
5040 * zc_cookie number of bytes read
5041 * zc_nvlist_dst{_size} error for each unapplied received property
5042 * zc_obj zprop_errflags_t
5043 * zc_action_handle handle for this guid/ds mapping
5044 */
5045 static int
5046 zfs_ioc_recv(zfs_cmd_t *zc)
5047 {
5048 int fd = zc->zc_cookie;
5049 char tofs[ZFS_MAX_DATASET_NAME_LEN];
5050 char *tosnap;
5051 char *origin = NULL;
5052 nvlist_t *errors;
5053 nvlist_t *props = NULL; /* sent properties */
5054 boolean_t force = (boolean_t)zc->zc_guid;
5055 int err;
5056
5057 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5058 strchr(zc->zc_value, '@') == NULL ||
5059 strchr(zc->zc_value, '%'))
5060 return (SET_ERROR(EINVAL));
5061
5062 (void) strcpy(tofs, zc->zc_value);
5063 tosnap = strchr(tofs, '@');
5064 *tosnap++ = '\0';
5065
5066 if (zc->zc_string[0])
5067 origin = zc->zc_string;
5068
5069 if (zc->zc_nvlist_src != NULL &&
5070 (err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5071 zc->zc_iflags, &props)) != 0)
5072 return (err);
5073
5074 errors = fnvlist_alloc();
5075
5076 err = dmu_recv_impl(fd, tofs, tosnap, origin,
5077 &zc->zc_begin_record, zc->zc_resumable, props, errors, &zc->zc_obj,
5078 zc->zc_cleanup_fd, &zc->zc_action_handle, &zc->zc_cookie,
5079 force, NULL);
5080
5081 /*
5082 * Now that all props, initial and delayed, are set, report the prop
5083 * errors to the caller.
5084 */
5085 if (zc->zc_nvlist_dst_size != 0 &&
5086 (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5087 put_nvlist(zc, errors) != 0)) {
5088 /*
5089 * Caller made zc->zc_nvlist_dst less than the minimum expected
5090 * size or supplied an invalid address.
5091 */
5092 err = SET_ERROR(EINVAL);
5093 }
5094
5095 nvlist_free(errors);
5096 nvlist_free(props);
5097 return (err);
5098
5099 }
5100
5101 /*
5102 * inputs:
5103 * zc_name name of snapshot to send
5104 * zc_cookie file descriptor to send stream to
5105 * zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
5106 * zc_sendobj objsetid of snapshot to send
5107 * zc_fromobj objsetid of incremental fromsnap (may be zero)
5108 * zc_guid if set, estimate size of stream only. zc_cookie is ignored.
5109 * output size in zc_objset_type.
5110 * zc_flags lzc_send_flags
5111 *
5112 * outputs:
5113 * zc_objset_type estimated size, if zc_guid is set
5114 */
5115 static int
5116 zfs_ioc_send(zfs_cmd_t *zc)
5117 {
5118 int error;
5119 offset_t off;
5120 boolean_t estimate = (zc->zc_guid != 0);
5121 boolean_t embedok = (zc->zc_flags & 0x1);
5122 boolean_t large_block_ok = (zc->zc_flags & 0x2);
5123 boolean_t compressok = (zc->zc_flags & 0x4);
5124
5125 if (zc->zc_obj != 0) {
5126 dsl_pool_t *dp;
5127 dsl_dataset_t *tosnap;
5128
5129 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5130 if (error != 0)
5131 return (error);
5132
5133 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5134 if (error != 0) {
5135 dsl_pool_rele(dp, FTAG);
5136 return (error);
5137 }
5138
5139 if (dsl_dir_is_clone(tosnap->ds_dir))
5140 zc->zc_fromobj =
5141 dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
5142 dsl_dataset_rele(tosnap, FTAG);
5143 dsl_pool_rele(dp, FTAG);
5144 }
5145
5146 if (estimate) {
5147 dsl_pool_t *dp;
5148 dsl_dataset_t *tosnap;
5149 dsl_dataset_t *fromsnap = NULL;
5150
5151 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5152 if (error != 0)
5153 return (error);
5154
5155 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
5156 if (error != 0) {
5157 dsl_pool_rele(dp, FTAG);
5158 return (error);
5159 }
5160
5161 if (zc->zc_fromobj != 0) {
5162 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
5163 FTAG, &fromsnap);
5164 if (error != 0) {
5165 dsl_dataset_rele(tosnap, FTAG);
5166 dsl_pool_rele(dp, FTAG);
5167 return (error);
5168 }
5169 }
5170
5171 error = dmu_send_estimate(tosnap, fromsnap, compressok,
5172 &zc->zc_objset_type);
5173
5174 if (fromsnap != NULL)
5175 dsl_dataset_rele(fromsnap, FTAG);
5176 dsl_dataset_rele(tosnap, FTAG);
5177 dsl_pool_rele(dp, FTAG);
5178 } else {
5179 offset_t off_starting;
5180 file_t *fp = getf(zc->zc_cookie);
5181 if (fp == NULL)
5182 return (SET_ERROR(EBADF));
5183
5184 off_starting = off = fp->f_offset;
5185 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
5186 zc->zc_fromobj, embedok, large_block_ok, compressok,
5187 zc->zc_cookie, fp->f_vnode, &off, zc->zc_sendsize);
5188
5189 zc->zc_sendcounter = off - off_starting;
5190 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5191 fp->f_offset = off;
5192 releasef(zc->zc_cookie);
5193 }
5194 return (error);
5195 }
5196
5197 /*
5198 * inputs:
5199 * zc_name name of snapshot on which to report progress
5200 * zc_cookie file descriptor of send stream
5201 *
5202 * outputs:
5203 * zc_cookie number of bytes written in send stream thus far
5204 */
5205 static int
5206 zfs_ioc_send_progress(zfs_cmd_t *zc)
5207 {
5208 dsl_pool_t *dp;
5209 dsl_dataset_t *ds;
5210 dmu_sendarg_t *dsp = NULL;
5211 int error;
5212
5213 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5214 if (error != 0)
5215 return (error);
5216
5217 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5218 if (error != 0) {
5219 dsl_pool_rele(dp, FTAG);
5220 return (error);
5221 }
5222
5223 mutex_enter(&ds->ds_sendstream_lock);
5224
5225 /*
5226 * Iterate over all the send streams currently active on this dataset.
5227 * If there's one which matches the specified file descriptor _and_ the
5228 * stream was started by the current process, return the progress of
5229 * that stream.
5230 */
5231 for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
5232 dsp = list_next(&ds->ds_sendstreams, dsp)) {
5233 if (dsp->dsa_outfd == zc->zc_cookie &&
5234 dsp->dsa_proc == curproc)
5235 break;
5236 }
5237
5238 if (dsp != NULL)
5239 zc->zc_cookie = *(dsp->dsa_off);
5240 else
5241 error = SET_ERROR(ENOENT);
5242
5243 mutex_exit(&ds->ds_sendstream_lock);
5244 dsl_dataset_rele(ds, FTAG);
5245 dsl_pool_rele(dp, FTAG);
5246 return (error);
5247 }
5248
5249 static int
5250 zfs_ioc_inject_fault(zfs_cmd_t *zc)
5251 {
5252 int id, error;
5253
5254 error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
5255 &zc->zc_inject_record);
5256
5257 if (error == 0)
5258 zc->zc_guid = (uint64_t)id;
5259
5260 return (error);
5261 }
5262
5263 static int
5264 zfs_ioc_clear_fault(zfs_cmd_t *zc)
5265 {
5266 return (zio_clear_fault((int)zc->zc_guid));
5267 }
5268
5269 static int
5270 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
5271 {
5272 int id = (int)zc->zc_guid;
5273 int error;
5274
5275 error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
5276 &zc->zc_inject_record);
5277
5278 zc->zc_guid = id;
5279
5280 return (error);
5281 }
5282
5283 static int
5284 zfs_ioc_error_log(zfs_cmd_t *zc)
5285 {
5286 spa_t *spa;
5287 int error;
5288 size_t count = (size_t)zc->zc_nvlist_dst_size;
5289
5290 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
5291 return (error);
5292
5293 error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
5294 &count);
5295 if (error == 0)
5296 zc->zc_nvlist_dst_size = count;
5297 else
5298 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
5299
5300 spa_close(spa, FTAG);
5301
5302 return (error);
5303 }
5304
5305 static int
5306 zfs_ioc_clear(zfs_cmd_t *zc)
5307 {
5308 spa_t *spa;
5309 vdev_t *vd;
5310 int error;
5311
5312 /*
5313 * On zpool clear we also fix up missing slogs
5314 */
5315 mutex_enter(&spa_namespace_lock);
5316 spa = spa_lookup(zc->zc_name);
5317 if (spa == NULL) {
5318 mutex_exit(&spa_namespace_lock);
5319 return (SET_ERROR(EIO));
5320 }
5321 if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
5322 /* we need to let spa_open/spa_load clear the chains */
5323 spa_set_log_state(spa, SPA_LOG_CLEAR);
5324 }
5325 spa->spa_last_open_failed = 0;
5326 mutex_exit(&spa_namespace_lock);
5327
5328 if (zc->zc_cookie & ZPOOL_NO_REWIND) {
5329 error = spa_open(zc->zc_name, &spa, FTAG);
5330 } else {
5331 nvlist_t *policy;
5332 nvlist_t *config = NULL;
5333
5334 if (zc->zc_nvlist_src == NULL)
5335 return (SET_ERROR(EINVAL));
5336
5337 if ((error = get_nvlist(zc->zc_nvlist_src,
5338 zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
5339 error = spa_open_rewind(zc->zc_name, &spa, FTAG,
5340 policy, &config);
5341 if (config != NULL) {
5342 int err;
5343
5344 if ((err = put_nvlist(zc, config)) != 0)
5345 error = err;
5346 nvlist_free(config);
5347 }
5348 nvlist_free(policy);
5349 }
5350 }
5351
5352 if (error != 0)
5353 return (error);
5354
5355 spa_vdev_state_enter(spa, SCL_NONE);
5356
5357 if (zc->zc_guid == 0) {
5358 vd = NULL;
5359 } else {
5360 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
5361 if (vd == NULL) {
5362 (void) spa_vdev_state_exit(spa, NULL, ENODEV);
5363 spa_close(spa, FTAG);
5364 return (SET_ERROR(ENODEV));
5365 }
5366 }
5367
5368 vdev_clear(spa, vd);
5369
5370 (void) spa_vdev_state_exit(spa, NULL, 0);
5371
5372 /*
5373 * Resume any suspended I/Os.
5374 */
5375 if (zio_resume(spa) != 0)
5376 error = SET_ERROR(EIO);
5377
5378 spa_close(spa, FTAG);
5379
5380 return (error);
5381 }
5382
5383 static int
5384 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
5385 {
5386 spa_t *spa;
5387 int error;
5388
5389 error = spa_open(zc->zc_name, &spa, FTAG);
5390 if (error != 0)
5391 return (error);
5392
5393 spa_vdev_state_enter(spa, SCL_NONE);
5394
5395 /*
5396 * If a resilver is already in progress then set the
5397 * spa_scrub_reopen flag to B_TRUE so that we don't restart
5398 * the scan as a side effect of the reopen. Otherwise, let
5399 * vdev_open() decided if a resilver is required.
5400 */
5401 spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
5402 vdev_reopen(spa->spa_root_vdev);
5403 spa->spa_scrub_reopen = B_FALSE;
5404
5405 (void) spa_vdev_state_exit(spa, NULL, 0);
5406 spa_close(spa, FTAG);
5407 return (0);
5408 }
5409 /*
5410 * inputs:
5411 * zc_name name of filesystem
5412 *
5413 * outputs:
5414 * zc_string name of conflicting snapshot, if there is one
5415 */
5416 static int
5417 zfs_ioc_promote(zfs_cmd_t *zc)
5418 {
5419 dsl_pool_t *dp;
5420 dsl_dataset_t *ds, *ods;
5421 char origin[ZFS_MAX_DATASET_NAME_LEN];
5422 char *cp;
5423 int error;
5424 nvlist_t *event;
5425
5426 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5427 if (error != 0)
5428 return (error);
5429
5430 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
5431 if (error != 0) {
5432 dsl_pool_rele(dp, FTAG);
5433 return (error);
5434 }
5435
5436 if (!dsl_dir_is_clone(ds->ds_dir)) {
5437 dsl_dataset_rele(ds, FTAG);
5438 dsl_pool_rele(dp, FTAG);
5439 return (SET_ERROR(EINVAL));
5440 }
5441
5442 error = dsl_dataset_hold_obj(dp,
5443 dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
5444 if (error != 0) {
5445 dsl_dataset_rele(ds, FTAG);
5446 dsl_pool_rele(dp, FTAG);
5447 return (error);
5448 }
5449
5450 dsl_dataset_name(ods, origin);
5451 dsl_dataset_rele(ods, FTAG);
5452 dsl_dataset_rele(ds, FTAG);
5453 dsl_pool_rele(dp, FTAG);
5454
5455 /*
5456 * We don't need to unmount *all* the origin fs's snapshots, but
5457 * it's easier.
5458 */
5459 cp = strchr(origin, '@');
5460 if (cp)
5461 *cp = '\0';
5462 (void) dmu_objset_find(origin,
5463 zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
5464 error = dsl_dataset_promote(zc->zc_name, zc->zc_string);
5465
5466 if (error == 0) {
5467 event = fnvlist_alloc();
5468 fnvlist_add_string(event, "fsname", zc->zc_name);
5469 fnvlist_add_string(event, "origin", zc->zc_value);
5470 zfs_event_post(ZFS_EC_STATUS, "promote", event);
5471 }
5472
5473 return (error);
5474 }
5475
5476 /*
5477 * Retrieve a single {user|group}{used|quota}@... property.
5478 *
5479 * inputs:
5480 * zc_name name of filesystem
5481 * zc_objset_type zfs_userquota_prop_t
5482 * zc_value domain name (eg. "S-1-234-567-89")
5483 * zc_guid RID/UID/GID
5484 *
5485 * outputs:
5486 * zc_cookie property value
5487 */
5488 static int
5489 zfs_ioc_userspace_one(zfs_cmd_t *zc)
5490 {
5491 zfsvfs_t *zfsvfs;
5492 int error;
5493
5494 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5495 return (SET_ERROR(EINVAL));
5496
5497 error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5498 if (error != 0)
5499 return (error);
5500
5501 error = zfs_userspace_one(zfsvfs,
5502 zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5503 zfsvfs_rele(zfsvfs, FTAG);
5504
5505 return (error);
5506 }
5507
5508 /*
5509 * inputs:
5510 * zc_name name of filesystem
5511 * zc_cookie zap cursor
5512 * zc_objset_type zfs_userquota_prop_t
5513 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5514 *
5515 * outputs:
5516 * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
5517 * zc_cookie zap cursor
5518 */
5519 static int
5520 zfs_ioc_userspace_many(zfs_cmd_t *zc)
5521 {
5522 zfsvfs_t *zfsvfs;
5523 int bufsize = zc->zc_nvlist_dst_size;
5524
5525 if (bufsize <= 0)
5526 return (SET_ERROR(ENOMEM));
5527
5528 int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5529 if (error != 0)
5530 return (error);
5531
5532 void *buf = kmem_alloc(bufsize, KM_SLEEP);
5533
5534 error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5535 buf, &zc->zc_nvlist_dst_size);
5536
5537 if (error == 0) {
5538 error = xcopyout(buf,
5539 (void *)(uintptr_t)zc->zc_nvlist_dst,
5540 zc->zc_nvlist_dst_size);
5541 }
5542 kmem_free(buf, bufsize);
5543 zfsvfs_rele(zfsvfs, FTAG);
5544
5545 return (error);
5546 }
5547
5548 /*
5549 * inputs:
5550 * zc_name name of filesystem
5551 *
5552 * outputs:
5553 * none
5554 */
5555 static int
5556 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5557 {
5558 objset_t *os;
5559 int error = 0;
5560 zfsvfs_t *zfsvfs;
5561
5562 if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5563 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5564 /*
5565 * If userused is not enabled, it may be because the
5566 * objset needs to be closed & reopened (to grow the
5567 * objset_phys_t). Suspend/resume the fs will do that.
5568 */
5569 dsl_dataset_t *ds;
5570
5571 ds = dmu_objset_ds(zfsvfs->z_os);
5572 error = zfs_suspend_fs(zfsvfs);
5573 if (error == 0) {
5574 dmu_objset_refresh_ownership(zfsvfs->z_os,
5575 zfsvfs);
5576 error = zfs_resume_fs(zfsvfs, ds);
5577 }
5578 }
5579 if (error == 0)
5580 error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5581 VFS_RELE(zfsvfs->z_vfs);
5582 } else {
5583 /* XXX kind of reading contents without owning */
5584 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5585 if (error != 0)
5586 return (error);
5587
5588 error = dmu_objset_userspace_upgrade(os);
5589 dmu_objset_rele(os, FTAG);
5590 }
5591
5592 return (error);
5593 }
5594
5595 /*
5596 * We don't want to have a hard dependency
5597 * against some special symbols in sharefs
5598 * nfs, and smbsrv. Determine them if needed when
5599 * the first file system is shared.
5600 * Neither sharefs, nfs or smbsrv are unloadable modules.
5601 */
5602 int (*znfsexport_fs)(void *arg);
5603 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5604 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5605
5606 int zfs_nfsshare_inited;
5607 int zfs_smbshare_inited;
5608
5609 ddi_modhandle_t nfs_mod;
5610 ddi_modhandle_t sharefs_mod;
5611 ddi_modhandle_t smbsrv_mod;
5612 kmutex_t zfs_share_lock;
5613
5614 static int
5615 zfs_init_sharefs()
5616 {
5617 int error;
5618
5619 ASSERT(MUTEX_HELD(&zfs_share_lock));
5620 /* Both NFS and SMB shares also require sharetab support. */
5621 if (sharefs_mod == NULL && ((sharefs_mod =
5622 ddi_modopen("fs/sharefs",
5623 KRTLD_MODE_FIRST, &error)) == NULL)) {
5624 return (SET_ERROR(ENOSYS));
5625 }
5626 if (zshare_fs == NULL && ((zshare_fs =
5627 (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5628 ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5629 return (SET_ERROR(ENOSYS));
5630 }
5631 return (0);
5632 }
5633
5634 static int
5635 zfs_ioc_share(zfs_cmd_t *zc)
5636 {
5637 int error;
5638 int opcode;
5639
5640 switch (zc->zc_share.z_sharetype) {
5641 case ZFS_SHARE_NFS:
5642 case ZFS_UNSHARE_NFS:
5643 if (zfs_nfsshare_inited == 0) {
5644 mutex_enter(&zfs_share_lock);
5645 if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5646 KRTLD_MODE_FIRST, &error)) == NULL)) {
5647 mutex_exit(&zfs_share_lock);
5648 return (SET_ERROR(ENOSYS));
5649 }
5650 if (znfsexport_fs == NULL &&
5651 ((znfsexport_fs = (int (*)(void *))
5652 ddi_modsym(nfs_mod,
5653 "nfs_export", &error)) == NULL)) {
5654 mutex_exit(&zfs_share_lock);
5655 return (SET_ERROR(ENOSYS));
5656 }
5657 error = zfs_init_sharefs();
5658 if (error != 0) {
5659 mutex_exit(&zfs_share_lock);
5660 return (SET_ERROR(ENOSYS));
5661 }
5662 zfs_nfsshare_inited = 1;
5663 mutex_exit(&zfs_share_lock);
5664 }
5665 break;
5666 case ZFS_SHARE_SMB:
5667 case ZFS_UNSHARE_SMB:
5668 if (zfs_smbshare_inited == 0) {
5669 mutex_enter(&zfs_share_lock);
5670 if (smbsrv_mod == NULL && ((smbsrv_mod =
5671 ddi_modopen("drv/smbsrv",
5672 KRTLD_MODE_FIRST, &error)) == NULL)) {
5673 mutex_exit(&zfs_share_lock);
5674 return (SET_ERROR(ENOSYS));
5675 }
5676 if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5677 (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5678 "smb_server_share", &error)) == NULL)) {
5679 mutex_exit(&zfs_share_lock);
5680 return (SET_ERROR(ENOSYS));
5681 }
5682 error = zfs_init_sharefs();
5683 if (error != 0) {
5684 mutex_exit(&zfs_share_lock);
5685 return (SET_ERROR(ENOSYS));
5686 }
5687 zfs_smbshare_inited = 1;
5688 mutex_exit(&zfs_share_lock);
5689 }
5690 break;
5691 default:
5692 return (SET_ERROR(EINVAL));
5693 }
5694
5695 switch (zc->zc_share.z_sharetype) {
5696 case ZFS_SHARE_NFS:
5697 case ZFS_UNSHARE_NFS:
5698 if (error =
5699 znfsexport_fs((void *)
5700 (uintptr_t)zc->zc_share.z_exportdata))
5701 return (error);
5702 break;
5703 case ZFS_SHARE_SMB:
5704 case ZFS_UNSHARE_SMB:
5705 if (error = zsmbexport_fs((void *)
5706 (uintptr_t)zc->zc_share.z_exportdata,
5707 zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5708 B_TRUE: B_FALSE)) {
5709 return (error);
5710 }
5711 break;
5712 }
5713
5714 opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5715 zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5716 SHAREFS_ADD : SHAREFS_REMOVE;
5717
5718 /*
5719 * Add or remove share from sharetab
5720 */
5721 error = zshare_fs(opcode,
5722 (void *)(uintptr_t)zc->zc_share.z_sharedata,
5723 zc->zc_share.z_sharemax);
5724
5725 return (error);
5726
5727 }
5728
5729 ace_t full_access[] = {
5730 {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5731 };
5732
5733 /*
5734 * inputs:
5735 * zc_name name of containing filesystem
5736 * zc_obj object # beyond which we want next in-use object #
5737 *
5738 * outputs:
5739 * zc_obj next in-use object #
5740 */
5741 static int
5742 zfs_ioc_next_obj(zfs_cmd_t *zc)
5743 {
5744 objset_t *os = NULL;
5745 int error;
5746
5747 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5748 if (error != 0)
5749 return (error);
5750
5751 error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5752 dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5753
5754 dmu_objset_rele(os, FTAG);
5755 return (error);
5756 }
5757
5758 /*
5759 * inputs:
5760 * zc_name name of filesystem
5761 * zc_value prefix name for snapshot
5762 * zc_cleanup_fd cleanup-on-exit file descriptor for calling process
5763 *
5764 * outputs:
5765 * zc_value short name of new snapshot
5766 */
5767 static int
5768 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5769 {
5770 char *snap_name;
5771 char *hold_name;
5772 int error;
5773 minor_t minor;
5774
5775 error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5776 if (error != 0)
5777 return (error);
5778
5779 snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5780 (u_longlong_t)ddi_get_lbolt64());
5781 hold_name = kmem_asprintf("%%%s", zc->zc_value);
5782
5783 error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5784 hold_name);
5785 if (error == 0)
5786 (void) strcpy(zc->zc_value, snap_name);
5787 strfree(snap_name);
5788 strfree(hold_name);
5789 zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5790 return (error);
5791 }
5792
5793 /*
5794 * inputs:
5795 * zc_name name of "to" snapshot
5796 * zc_value name of "from" snapshot
5797 * zc_cookie file descriptor to write diff data on
5798 *
5799 * outputs:
5800 * dmu_diff_record_t's to the file descriptor
5801 */
5802 static int
5803 zfs_ioc_diff(zfs_cmd_t *zc)
5804 {
5805 file_t *fp;
5806 offset_t off;
5807 int error;
5808
5809 fp = getf(zc->zc_cookie);
5810 if (fp == NULL)
5811 return (SET_ERROR(EBADF));
5812
5813 off = fp->f_offset;
5814
5815 error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5816
5817 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5818 fp->f_offset = off;
5819 releasef(zc->zc_cookie);
5820
5821 return (error);
5822 }
5823
5824 /*
5825 * Remove all ACL files in shares dir
5826 */
5827 static int
5828 zfs_smb_acl_purge(znode_t *dzp)
5829 {
5830 zap_cursor_t zc;
5831 zap_attribute_t zap;
5832 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5833 int error;
5834
5835 for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5836 (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5837 zap_cursor_advance(&zc)) {
5838 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5839 NULL, 0)) != 0)
5840 break;
5841 }
5842 zap_cursor_fini(&zc);
5843 return (error);
5844 }
5845
5846 static int
5847 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5848 {
5849 vnode_t *vp;
5850 znode_t *dzp;
5851 vnode_t *resourcevp = NULL;
5852 znode_t *sharedir;
5853 zfsvfs_t *zfsvfs;
5854 nvlist_t *nvlist;
5855 char *src, *target;
5856 vattr_t vattr;
5857 vsecattr_t vsec;
5858 int error = 0;
5859
5860 if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5861 NO_FOLLOW, NULL, &vp)) != 0)
5862 return (error);
5863
5864 /* Now make sure mntpnt and dataset are ZFS */
5865
5866 if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5867 (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5868 zc->zc_name) != 0)) {
5869 VN_RELE(vp);
5870 return (SET_ERROR(EINVAL));
5871 }
5872
5873 dzp = VTOZ(vp);
5874 zfsvfs = dzp->z_zfsvfs;
5875 ZFS_ENTER(zfsvfs);
5876
5877 /*
5878 * Create share dir if its missing.
5879 */
5880 mutex_enter(&zfsvfs->z_lock);
5881 if (zfsvfs->z_shares_dir == 0) {
5882 dmu_tx_t *tx;
5883
5884 tx = dmu_tx_create(zfsvfs->z_os);
5885 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5886 ZFS_SHARES_DIR);
5887 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5888 error = dmu_tx_assign(tx, TXG_WAIT);
5889 if (error != 0) {
5890 dmu_tx_abort(tx);
5891 } else {
5892 error = zfs_create_share_dir(zfsvfs, tx);
5893 dmu_tx_commit(tx);
5894 }
5895 if (error != 0) {
5896 mutex_exit(&zfsvfs->z_lock);
5897 VN_RELE(vp);
5898 ZFS_EXIT(zfsvfs);
5899 return (error);
5900 }
5901 }
5902 mutex_exit(&zfsvfs->z_lock);
5903
5904 ASSERT(zfsvfs->z_shares_dir);
5905 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5906 VN_RELE(vp);
5907 ZFS_EXIT(zfsvfs);
5908 return (error);
5909 }
5910
5911 switch (zc->zc_cookie) {
5912 case ZFS_SMB_ACL_ADD:
5913 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5914 vattr.va_type = VREG;
5915 vattr.va_mode = S_IFREG|0777;
5916 vattr.va_uid = 0;
5917 vattr.va_gid = 0;
5918
5919 vsec.vsa_mask = VSA_ACE;
5920 vsec.vsa_aclentp = &full_access;
5921 vsec.vsa_aclentsz = sizeof (full_access);
5922 vsec.vsa_aclcnt = 1;
5923
5924 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5925 &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5926 if (resourcevp)
5927 VN_RELE(resourcevp);
5928 break;
5929
5930 case ZFS_SMB_ACL_REMOVE:
5931 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5932 NULL, 0);
5933 break;
5934
5935 case ZFS_SMB_ACL_RENAME:
5936 if ((error = get_nvlist(zc->zc_nvlist_src,
5937 zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5938 VN_RELE(vp);
5939 VN_RELE(ZTOV(sharedir));
5940 ZFS_EXIT(zfsvfs);
5941 return (error);
5942 }
5943 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5944 nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5945 &target)) {
5946 VN_RELE(vp);
5947 VN_RELE(ZTOV(sharedir));
5948 ZFS_EXIT(zfsvfs);
5949 nvlist_free(nvlist);
5950 return (error);
5951 }
5952 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5953 kcred, NULL, 0);
5954 nvlist_free(nvlist);
5955 break;
5956
5957 case ZFS_SMB_ACL_PURGE:
5958 error = zfs_smb_acl_purge(sharedir);
5959 break;
5960
5961 default:
5962 error = SET_ERROR(EINVAL);
5963 break;
5964 }
5965
5966 VN_RELE(vp);
5967 VN_RELE(ZTOV(sharedir));
5968
5969 ZFS_EXIT(zfsvfs);
5970
5971 return (error);
5972 }
5973
5974 /*
5975 * innvl: {
5976 * "holds" -> { snapname -> holdname (string), ... }
5977 * (optional) "cleanup_fd" -> fd (int32)
5978 * }
5979 *
5980 * outnvl: {
5981 * snapname -> error value (int32)
5982 * ...
5983 * }
5984 */
5985 /* ARGSUSED */
5986 static int
5987 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5988 {
5989 nvpair_t *pair;
5990 nvlist_t *holds;
5991 int cleanup_fd = -1;
5992 int error;
5993 minor_t minor = 0;
5994
5995 error = nvlist_lookup_nvlist(args, "holds", &holds);
5996 if (error != 0)
5997 return (SET_ERROR(EINVAL));
5998
5999 /* make sure the user didn't pass us any invalid (empty) tags */
6000 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6001 pair = nvlist_next_nvpair(holds, pair)) {
6002 char *htag;
6003
6004 error = nvpair_value_string(pair, &htag);
6005 if (error != 0)
6006 return (SET_ERROR(error));
6007
6008 if (strlen(htag) == 0)
6009 return (SET_ERROR(EINVAL));
6010 }
6011
6012 if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6013 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
6014 if (error != 0)
6015 return (error);
6016 }
6017
6018 error = dsl_dataset_user_hold(holds, minor, errlist);
6019 if (minor != 0)
6020 zfs_onexit_fd_rele(cleanup_fd);
6021 return (error);
6022 }
6023
6024 /*
6025 * innvl is not used.
6026 *
6027 * outnvl: {
6028 * holdname -> time added (uint64 seconds since epoch)
6029 * ...
6030 * }
6031 */
6032 /* ARGSUSED */
6033 static int
6034 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
6035 {
6036 return (dsl_dataset_get_holds(snapname, outnvl));
6037 }
6038
6039 /*
6040 * innvl: {
6041 * snapname -> { holdname, ... }
6042 * ...
6043 * }
6044 *
6045 * outnvl: {
6046 * snapname -> error value (int32)
6047 * ...
6048 * }
6049 */
6050 /* ARGSUSED */
6051 static int
6052 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
6053 {
6054 return (dsl_dataset_user_release(holds, errlist));
6055 }
6056
6057 /*
6058 * inputs:
6059 * zc_name name of new filesystem or snapshot
6060 * zc_value full name of old snapshot
6061 *
6062 * outputs:
6063 * zc_cookie space in bytes
6064 * zc_objset_type compressed space in bytes
6065 * zc_perm_action uncompressed space in bytes
6066 */
6067 static int
6068 zfs_ioc_space_written(zfs_cmd_t *zc)
6069 {
6070 int error;
6071 dsl_pool_t *dp;
6072 dsl_dataset_t *new, *old;
6073
6074 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6075 if (error != 0)
6076 return (error);
6077 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
6078 if (error != 0) {
6079 dsl_pool_rele(dp, FTAG);
6080 return (error);
6081 }
6082 error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
6083 if (error != 0) {
6084 dsl_dataset_rele(new, FTAG);
6085 dsl_pool_rele(dp, FTAG);
6086 return (error);
6087 }
6088
6089 error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
6090 &zc->zc_objset_type, &zc->zc_perm_action);
6091 dsl_dataset_rele(old, FTAG);
6092 dsl_dataset_rele(new, FTAG);
6093 dsl_pool_rele(dp, FTAG);
6094 return (error);
6095 }
6096
6097 /*
6098 * innvl: {
6099 * "firstsnap" -> snapshot name
6100 * }
6101 *
6102 * outnvl: {
6103 * "used" -> space in bytes
6104 * "compressed" -> compressed space in bytes
6105 * "uncompressed" -> uncompressed space in bytes
6106 * }
6107 */
6108 static int
6109 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
6110 {
6111 int error;
6112 dsl_pool_t *dp;
6113 dsl_dataset_t *new, *old;
6114 char *firstsnap;
6115 uint64_t used, comp, uncomp;
6116
6117 if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
6118 return (SET_ERROR(EINVAL));
6119
6120 error = dsl_pool_hold(lastsnap, FTAG, &dp);
6121 if (error != 0)
6122 return (error);
6123
6124 error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
6125 if (error == 0 && !new->ds_is_snapshot) {
6126 dsl_dataset_rele(new, FTAG);
6127 error = SET_ERROR(EINVAL);
6128 }
6129 if (error != 0) {
6130 dsl_pool_rele(dp, FTAG);
6131 return (error);
6132 }
6133 error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
6134 if (error == 0 && !old->ds_is_snapshot) {
6135 dsl_dataset_rele(old, FTAG);
6136 error = SET_ERROR(EINVAL);
6137 }
6138 if (error != 0) {
6139 dsl_dataset_rele(new, FTAG);
6140 dsl_pool_rele(dp, FTAG);
6141 return (error);
6142 }
6143
6144 error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
6145 dsl_dataset_rele(old, FTAG);
6146 dsl_dataset_rele(new, FTAG);
6147 dsl_pool_rele(dp, FTAG);
6148 fnvlist_add_uint64(outnvl, "used", used);
6149 fnvlist_add_uint64(outnvl, "compressed", comp);
6150 fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
6151 return (error);
6152 }
6153
6154 static int
6155 zfs_ioc_vdev_set_props(zfs_cmd_t *zc)
6156 {
6157 nvlist_t *props;
6158 spa_t *spa;
6159 int error;
6160 uint64_t vdev_guid = zc->zc_guid;
6161
6162 if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6163 zc->zc_iflags, &props))
6164 return (error);
6165
6166 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6167 nvlist_free(props);
6168 return (error);
6169 }
6170
6171 error = spa_vdev_prop_set(spa, vdev_guid, props);
6172
6173 nvlist_free(props);
6174 spa_close(spa, FTAG);
6175
6176 return (error);
6177 }
6178
6179 static int
6180 zfs_ioc_vdev_get_props(zfs_cmd_t *zc)
6181 {
6182 spa_t *spa;
6183 uint64_t vdev_guid = zc->zc_guid;
6184 nvlist_t *nvp = NULL;
6185 int error;
6186
6187 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6188 /*
6189 * If the pool is faulted, there may be properties we can still
6190 * get (such as altroot and cachefile), so attempt to get them
6191 * anyway.
6192 */
6193 mutex_enter(&spa_namespace_lock);
6194 if ((spa = spa_lookup(zc->zc_name)) != NULL)
6195 error = spa_vdev_prop_get(spa, vdev_guid, &nvp);
6196 mutex_exit(&spa_namespace_lock);
6197 } else {
6198 error = spa_vdev_prop_get(spa, vdev_guid, &nvp);
6199 spa_close(spa, FTAG);
6200 }
6201
6202 if (error == 0 && zc->zc_nvlist_dst != NULL)
6203 error = put_nvlist(zc, nvp);
6204 else
6205 error = EFAULT;
6206
6207 nvlist_free(nvp);
6208 return (error);
6209 }
6210
6211 static int
6212 zfs_ioc_cos_alloc(zfs_cmd_t *zc)
6213 {
6214 nvlist_t *props;
6215 spa_t *spa;
6216 int error;
6217
6218 if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6219 zc->zc_iflags, &props))
6220 return (error);
6221
6222 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6223 nvlist_free(props);
6224 return (error);
6225 }
6226
6227 error = spa_alloc_cos(spa, zc->zc_string, 0);
6228 if (!error)
6229 error = spa_cos_prop_set(spa, zc->zc_string, props);
6230
6231 spa_close(spa, FTAG);
6232 nvlist_free(props);
6233
6234 return (error);
6235 }
6236
6237 static int
6238 zfs_ioc_cos_free(zfs_cmd_t *zc)
6239 {
6240 spa_t *spa;
6241 int error = 0;
6242
6243 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
6244 return (error);
6245
6246 error = spa_free_cos(spa, zc->zc_string, zc->zc_cookie);
6247
6248 spa_close(spa, FTAG);
6249
6250 return (error);
6251 }
6252
6253 static int
6254 zfs_ioc_cos_list(zfs_cmd_t *zc)
6255 {
6256 spa_t *spa;
6257 nvlist_t *nvl;
6258 int error = 0;
6259
6260 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
6261
6262 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6263 nvlist_free(nvl);
6264 return (error);
6265 }
6266
6267 error = spa_list_cos(spa, nvl);
6268
6269 if (error == 0 && zc->zc_nvlist_dst != NULL)
6270 error = put_nvlist(zc, nvl);
6271
6272 spa_close(spa, FTAG);
6273 nvlist_free(nvl);
6274
6275 return (error);
6276 }
6277
6278 static int
6279 zfs_ioc_cos_set_props(zfs_cmd_t *zc)
6280 {
6281 nvlist_t *props;
6282 spa_t *spa;
6283 cos_t *cos;
6284 const char *cosname = NULL;
6285 int error = 0;
6286
6287 if ((zc->zc_string == NULL || zc->zc_string[0] == '\0') &&
6288 zc->zc_guid == 0)
6289 return (SET_ERROR(EINVAL));
6290
6291 if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6292 zc->zc_iflags, &props))
6293 return (error);
6294
6295 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
6296 nvlist_free(props);
6297 return (error);
6298 }
6299
6300 if (zc->zc_guid == 0) {
6301 cosname = zc->zc_string;
6302 } else {
6303 spa_cos_enter(spa);
6304 cos = spa_lookup_cos_by_guid(spa, zc->zc_guid);
6305 if (cos != NULL)
6306 cosname = cos->cos_name;
6307 else
6308 error = SET_ERROR(ENOENT);
6309 spa_cos_exit(spa);
6310 }
6311
6312 if (error == 0)
6313 error = spa_cos_prop_set(spa, cosname, props);
6314
6315 spa_close(spa, FTAG);
6316 nvlist_free(props);
6317
6318 return (error);
6319 }
6320
6321 static int
6322 zfs_ioc_cos_get_props(zfs_cmd_t *zc)
6323 {
6324 spa_t *spa;
6325 cos_t *cos;
6326 nvlist_t *nvp = NULL;
6327 const char *cosname = NULL;
6328 int error = 0;
6329
6330 if ((zc->zc_string == NULL || zc->zc_string[0] == '\0') &&
6331 zc->zc_guid == 0)
6332 return (SET_ERROR(EINVAL));
6333
6334 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
6335 return (error);
6336
6337 if (zc->zc_guid == 0) {
6338 cosname = zc->zc_string;
6339 } else {
6340 spa_cos_enter(spa);
6341 cos = spa_lookup_cos_by_guid(spa, zc->zc_guid);
6342 if (cos != NULL)
6343 cosname = cos->cos_name;
6344 spa_cos_exit(spa);
6345 }
6346
6347 if (error == 0)
6348 error = spa_cos_prop_get(spa, cosname, &nvp);
6349
6350 spa_close(spa, FTAG);
6351
6352 if (error == 0 && zc->zc_nvlist_dst != NULL)
6353 error = put_nvlist(zc, nvp);
6354 else
6355 error = EFAULT;
6356
6357 nvlist_free(nvp);
6358 return (error);
6359 }
6360
6361 /*
6362 * innvl: {
6363 * "fd" -> file descriptor to write stream to (int32)
6364 * (optional) "fromsnap" -> full snap name to send an incremental from
6365 * (optional) "largeblockok" -> (value ignored)
6366 * indicates that blocks > 128KB are permitted
6367 * (optional) "embedok" -> (value ignored)
6368 * presence indicates DRR_WRITE_EMBEDDED records are permitted
6369 * (optional) "compressok" -> (value ignored)
6370 * presence indicates compressed DRR_WRITE records are permitted
6371 * (optional) "resume_object" and "resume_offset" -> (uint64)
6372 * if present, resume send stream from specified object and offset.
6373 * }
6374 *
6375 * outnvl is unused
6376 */
6377 /* ARGSUSED */
6378 static int
6379 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6380 {
6381 int error;
6382 offset_t off;
6383 char *fromname = NULL;
6384 int fd;
6385 boolean_t largeblockok;
6386 boolean_t embedok;
6387 boolean_t compressok;
6388 uint64_t resumeobj = 0;
6389 uint64_t resumeoff = 0;
6390
6391 error = nvlist_lookup_int32(innvl, "fd", &fd);
6392 if (error != 0)
6393 return (SET_ERROR(EINVAL));
6394
6395 (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
6396
6397 largeblockok = nvlist_exists(innvl, "largeblockok");
6398 embedok = nvlist_exists(innvl, "embedok");
6399 compressok = nvlist_exists(innvl, "compressok");
6400
6401 (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
6402 (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
6403
6404 file_t *fp = getf(fd);
6405 if (fp == NULL)
6406 return (SET_ERROR(EBADF));
6407
6408 off = fp->f_offset;
6409 error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
6410 fd, resumeobj, resumeoff, fp->f_vnode, &off);
6411
6412 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
6413 fp->f_offset = off;
6414 releasef(fd);
6415 return (error);
6416 }
6417
6418 /*
6419 * Determine approximately how large a zfs send stream will be -- the number
6420 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
6421 *
6422 * innvl: {
6423 * (optional) "from" -> full snap or bookmark name to send an incremental
6424 * from
6425 * (optional) "largeblockok" -> (value ignored)
6426 * indicates that blocks > 128KB are permitted
6427 * (optional) "embedok" -> (value ignored)
6428 * presence indicates DRR_WRITE_EMBEDDED records are permitted
6429 * (optional) "compressok" -> (value ignored)
6430 * presence indicates compressed DRR_WRITE records are permitted
6431 * }
6432 *
6433 * outnvl: {
6434 * "space" -> bytes of space (uint64)
6435 * }
6436 */
6437 static int
6438 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
6439 {
6440 dsl_pool_t *dp;
6441 dsl_dataset_t *tosnap;
6442 int error;
6443 char *fromname;
6444 boolean_t compressok;
6445 uint64_t space;
6446
6447 error = dsl_pool_hold(snapname, FTAG, &dp);
6448 if (error != 0)
6449 return (error);
6450
6451 error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
6452 if (error != 0) {
6453 dsl_pool_rele(dp, FTAG);
6454 return (error);
6455 }
6456
6457 compressok = nvlist_exists(innvl, "compressok");
6458
6459 error = nvlist_lookup_string(innvl, "from", &fromname);
6460 if (error == 0) {
6461 if (strchr(fromname, '@') != NULL) {
6462 /*
6463 * If from is a snapshot, hold it and use the more
6464 * efficient dmu_send_estimate to estimate send space
6465 * size using deadlists.
6466 */
6467 dsl_dataset_t *fromsnap;
6468 error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
6469 if (error != 0)
6470 goto out;
6471 error = dmu_send_estimate(tosnap, fromsnap, compressok,
6472 &space);
6473 dsl_dataset_rele(fromsnap, FTAG);
6474 } else if (strchr(fromname, '#') != NULL) {
6475 /*
6476 * If from is a bookmark, fetch the creation TXG of the
6477 * snapshot it was created from and use that to find
6478 * blocks that were born after it.
6479 */
6480 zfs_bookmark_phys_t frombm;
6481
6482 error = dsl_bookmark_lookup(dp, fromname, tosnap,
6483 &frombm);
6484 if (error != 0)
6485 goto out;
6486 error = dmu_send_estimate_from_txg(tosnap,
6487 frombm.zbm_creation_txg, compressok, &space);
6488 } else {
6489 /*
6490 * from is not properly formatted as a snapshot or
6491 * bookmark
6492 */
6493 error = SET_ERROR(EINVAL);
6494 goto out;
6495 }
6496 } else {
6497 /*
6498 * If estimating the size of a full send, use dmu_send_estimate.
6499 */
6500 error = dmu_send_estimate(tosnap, NULL, compressok, &space);
6501 }
6502
6503 fnvlist_add_uint64(outnvl, "space", space);
6504
6505 out:
6506 dsl_dataset_rele(tosnap, FTAG);
6507 dsl_pool_rele(dp, FTAG);
6508 return (error);
6509 }
6510
6511 typedef struct dp_cursor_cb_arg {
6512 nvlist_t *outnvl;
6513 uint64_t offset;
6514 uint32_t count;
6515 uint32_t skip;
6516 boolean_t verbose;
6517 boolean_t snaps;
6518 } dp_cursor_cb_arg_t;
6519
6520 /* ARGSUSED */
6521 int
6522 ds_cursor_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
6523 {
6524 int error;
6525 char dsname[MAXNAMELEN];
6526 objset_t *osp;
6527
6528 dp_cursor_cb_arg_t *cb = (dp_cursor_cb_arg_t *)arg;
6529
6530 dsl_dataset_name(ds, dsname);
6531 nvlist_t *nv = fnvlist_alloc();
6532
6533 fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_GUID),
6534 dsl_dataset_phys(ds)->ds_guid);
6535
6536 if (cb->verbose) {
6537 uint64_t refd, avail, uobjs, aobjs;
6538 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
6539
6540 fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_AVAILABLE),
6541 avail);
6542 fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_REFERENCED),
6543 refd);
6544 fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_USED),
6545 dsl_dir_phys(ds->ds_dir)->dd_used_bytes);
6546 }
6547
6548 error = dmu_objset_from_ds(ds, &osp);
6549
6550 if (error == 0)
6551 fnvlist_add_uint64(nv, zfs_prop_to_name(ZFS_PROP_TYPE),
6552 dmu_objset_type(osp));
6553
6554 fnvlist_add_nvlist(cb->outnvl, dsname, nv);
6555 nvlist_free(nv);
6556 return (error);
6557 }
6558
6559 int
6560 dmu_objset_find_dp_cursor(dsl_pool_t *dp, uint64_t ddobj,
6561 int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg)
6562 {
6563 dsl_dir_t *dd;
6564 dsl_dataset_t *ds;
6565 zap_cursor_t zc;
6566 zap_attribute_t *attr;
6567 uint64_t thisobj;
6568 int error, i;
6569
6570 dp_cursor_cb_arg_t *cb = (dp_cursor_cb_arg_t *)arg;
6571
6572 ASSERT(dsl_pool_config_held(dp));
6573 error = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
6574 thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
6575
6576 if (error != 0)
6577 return (error);
6578
6579 attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
6580
6581 /* we are interrestd in filesytems and volumes */
6582 if (!cb->snaps) {
6583
6584 /* init the cursor at given offset */
6585 zap_cursor_init_serialized(&zc, dp->dp_meta_objset,
6586 dsl_dir_phys(dd)->dd_child_dir_zapobj, cb->offset);
6587
6588
6589 for (i = 0; i < cb->skip; i++) {
6590 zap_cursor_advance(&zc);
6591 if ((zap_cursor_retrieve(&zc, attr) != 0)) {
6592 error = ENOENT;
6593 goto out;
6594 }
6595 }
6596
6597 for (i = 0; i < cb->count; i++) {
6598 zap_cursor_advance(&zc);
6599 if ((zap_cursor_retrieve(&zc, attr) != 0)) {
6600 error = ENOENT;
6601 goto out;
6602 }
6603
6604 ASSERT3U(attr->za_integer_length, ==,
6605 sizeof (uint64_t));
6606 ASSERT3U(attr->za_num_integers, ==, 1);
6607 /* recursivly walk objects skipping $MOS and $ORIGIN */
6608 error = dmu_objset_find_dp(dp, attr->za_first_integer,
6609 func, arg, 0);
6610 if (error != 0)
6611 break;
6612 }
6613 } else {
6614
6615 dsl_dataset_t *ds;
6616
6617 error = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
6618
6619 if (error == 0) {
6620
6621 dsl_dataset_rele(ds, FTAG);
6622 zap_cursor_init_serialized(&zc, dp->dp_meta_objset,
6623 dsl_dataset_phys(ds)->ds_snapnames_zapobj,
6624 cb->offset);
6625
6626 for (i = 0; i < cb->skip; i++) {
6627 zap_cursor_advance(&zc);
6628 if ((zap_cursor_retrieve(&zc,
6629 attr) != 0)) {
6630 error = ENOENT;
6631 goto out;
6632 }
6633 }
6634
6635 for (i = 0; i < cb->count; i++) {
6636 zap_cursor_advance(&zc);
6637 if ((zap_cursor_retrieve(&zc, attr) != 0)) {
6638 error = ENOENT;
6639 goto out;
6640
6641 }
6642
6643 ASSERT3U(attr->za_integer_length, ==,
6644 sizeof (uint64_t));
6645 ASSERT3U(attr->za_num_integers, ==, 1);
6646
6647 error = dsl_dataset_hold_obj(dp,
6648 attr->za_first_integer, FTAG, &ds);
6649 if (error != 0)
6650 break;
6651 error = func(dp, ds, arg);
6652 dsl_dataset_rele(ds, FTAG);
6653 if (error != 0)
6654 break;
6655 }
6656 }
6657 }
6658 out:
6659 cb->offset = zap_cursor_serialize(&zc);
6660 zap_cursor_fini(&zc);
6661 dsl_dir_rele(dd, FTAG);
6662 kmem_free(attr, sizeof (zap_attribute_t));
6663
6664 /* return self as the last dataset */
6665 if (error == ENOENT) {
6666 if ((error = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds)) != 0)
6667 return (error);
6668 error = func(dp, ds, arg);
6669 dsl_dataset_rele(ds, FTAG);
6670 if (error)
6671 return (error);
6672 error = ENOENT;
6673 }
6674
6675 return (error);
6676 }
6677
6678
6679 /*
6680 * We want to list all dataset under the given name. Optionally, we advance the
6681 * ZAP cursor "skip" times and retrieve "count" datasets. We return the offset
6682 * so the user can start the next invocation where he left off.
6683 */
6684
6685 static int
6686 zfs_ioc_list_from_cursor(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
6687 {
6688
6689 dsl_pool_t *dp;
6690 dsl_dataset_t *ds;
6691
6692 int error;
6693
6694 dp_cursor_cb_arg_t cb_args;
6695
6696 if ((strchr(name, '@') != NULL))
6697 return (EINVAL);
6698
6699 if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
6700 return (error);
6701
6702 if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
6703 dsl_pool_rele(dp, FTAG);
6704 return (error);
6705 }
6706
6707 (void) nvlist_lookup_uint32(innvl, "count", &cb_args.count);
6708 (void) nvlist_lookup_uint32(innvl, "skip", &cb_args.skip);
6709 (void) nvlist_lookup_uint64(innvl, "offset", &cb_args.offset);
6710 (void) nvlist_lookup_boolean_value(innvl, "verbose", &cb_args.verbose);
6711 (void) nvlist_lookup_boolean_value(innvl, "snaps", &cb_args.snaps);
6712
6713 cb_args.outnvl = outnvl;
6714 error = dmu_objset_find_dp_cursor(dp, ds->ds_dir->dd_object,
6715 &ds_cursor_cb, &cb_args);
6716
6717 fnvlist_add_uint64(outnvl, "offset", cb_args.offset);
6718 dsl_dataset_rele(ds, FTAG);
6719 dsl_pool_rele(dp, FTAG);
6720
6721 return (error);
6722 }
6723
6724
6725 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
6726
6727 static void
6728 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6729 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6730 boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
6731 {
6732 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6733
6734 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6735 ASSERT3U(ioc, <, ZFS_IOC_LAST);
6736 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6737 ASSERT3P(vec->zvec_func, ==, NULL);
6738
6739 vec->zvec_legacy_func = func;
6740 vec->zvec_secpolicy = secpolicy;
6741 vec->zvec_namecheck = namecheck;
6742 vec->zvec_allow_log = log_history;
6743 vec->zvec_pool_check = pool_check;
6744 }
6745
6746 /*
6747 * See the block comment at the beginning of this file for details on
6748 * each argument to this function.
6749 */
6750 static void
6751 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
6752 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
6753 zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
6754 boolean_t allow_log)
6755 {
6756 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
6757
6758 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
6759 ASSERT3U(ioc, <, ZFS_IOC_LAST);
6760 ASSERT3P(vec->zvec_legacy_func, ==, NULL);
6761 ASSERT3P(vec->zvec_func, ==, NULL);
6762
6763 /* if we are logging, the name must be valid */
6764 ASSERT(!allow_log || namecheck != NO_NAME);
6765
6766 vec->zvec_name = name;
6767 vec->zvec_func = func;
6768 vec->zvec_secpolicy = secpolicy;
6769 vec->zvec_namecheck = namecheck;
6770 vec->zvec_pool_check = pool_check;
6771 vec->zvec_smush_outnvlist = smush_outnvlist;
6772 vec->zvec_allow_log = allow_log;
6773 }
6774
6775 static void
6776 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6777 zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
6778 zfs_ioc_poolcheck_t pool_check)
6779 {
6780 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6781 POOL_NAME, log_history, pool_check);
6782 }
6783
6784 static void
6785 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6786 zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
6787 {
6788 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6789 DATASET_NAME, B_FALSE, pool_check);
6790 }
6791
6792 static void
6793 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6794 {
6795 zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
6796 POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6797 }
6798
6799 static void
6800 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6801 zfs_secpolicy_func_t *secpolicy)
6802 {
6803 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6804 NO_NAME, B_FALSE, POOL_CHECK_NONE);
6805 }
6806
6807 static void
6808 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
6809 zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
6810 {
6811 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6812 DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
6813 }
6814
6815 static void
6816 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
6817 {
6818 zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
6819 zfs_secpolicy_read);
6820 }
6821
6822 static void
6823 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
6824 zfs_secpolicy_func_t *secpolicy)
6825 {
6826 zfs_ioctl_register_legacy(ioc, func, secpolicy,
6827 DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6828 }
6829
6830 /*
6831 * Appearing to take poolname as a parameter is a concession to the ioctl
6832 * handler. Leading underbar for generation idea nvpair exists only on output to
6833 * avoid pool name conflict.
6834 */
6835 /* ARGSUSED */
6836 static int
6837 zfs_ioc_pool_configs_nvl(const char *poolname, nvlist_t *innvl,
6838 nvlist_t *outnvl)
6839 {
6840 nvlist_t *configs;
6841 uint64_t generation;
6842
6843 if (nvlist_lookup_uint64(innvl, "generation", &generation) != 0)
6844 return (SET_ERROR(EINVAL));
6845
6846 if ((configs = spa_all_configs(&generation)) == NULL)
6847 return (SET_ERROR(EEXIST));
6848
6849 fnvlist_merge(outnvl, configs);
6850 nvlist_free(configs);
6851 fnvlist_add_uint64(outnvl, "_generation", generation);
6852
6853 return (0);
6854 }
6855
6856 /*
6857 * Ask spa for pool statistics. If we get a non-NULL config but a non-zero
6858 * return from spa, we return EAGAIN to hint to callers that we've retrieved
6859 * a config for a faulted pool. We take no arguments but declare otherwise to
6860 * suit the ioctl handler's pattern. Similar considerations apply to outnvl as a
6861 * single pointer that has to be merged with config allocated or nulled by spa.
6862 */
6863 static int
6864 zfs_ioc_pool_stats_nvl(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
6865 {
6866 nvlist_t *config;
6867 int error;
6868 int ret = 0;
6869
6870 ASSERT3P(innvl, ==, NULL);
6871 error = spa_get_stats(poolname, &config, NULL, 0);
6872 ASSERT3U(error, !=, EAGAIN);
6873
6874 if (config != NULL) {
6875 fnvlist_merge(outnvl, config);
6876 nvlist_free(config);
6877 if (error)
6878 ret = SET_ERROR(EAGAIN);
6879 } else {
6880 ret = error;
6881 }
6882
6883 return (ret);
6884 }
6885
6886 static nvlist_t *
6887 objset_stats2nv(dmu_objset_stats_t *stat)
6888 {
6889 nvlist_t *statlist = fnvlist_alloc();
6890
6891 fnvlist_add_uint64(statlist, "dds_num_clones", stat->dds_num_clones);
6892 fnvlist_add_uint64(statlist, "dds_creation_txg",
6893 stat->dds_creation_txg);
6894 fnvlist_add_uint64(statlist, "dds_guid", stat->dds_guid);
6895 fnvlist_add_uint8(statlist, "dds_type", (uint8_t)stat->dds_type);
6896 fnvlist_add_uint8(statlist, "dds_is_snapshot", stat->dds_is_snapshot);
6897 fnvlist_add_uint8(statlist, "dds_inconsistent",
6898 stat->dds_inconsistent);
6899 fnvlist_add_string(statlist, "dds_origin", stat->dds_origin);
6900
6901 return (statlist);
6902 }
6903
6904 /* Given an objset, retrieve stats and props by adding them to the output nvl */
6905 static int
6906 objset_render(objset_t *os, nvlist_t *outnvl)
6907 {
6908 int error = 0;
6909 nvlist_t *props = NULL, *statlist = NULL;
6910 dmu_objset_stats_t stats;
6911
6912 dmu_objset_fast_stat(os, &stats);
6913
6914 if ((error = dsl_prop_get_all(os, &props)) == 0) {
6915 dmu_objset_stats(os, props);
6916 /*
6917 * NB: zvol_get_stats() will read the objset contents,
6918 * which we aren't supposed to do with a
6919 * DS_MODE_USER hold, because it could be
6920 * inconsistent. So this is a bit of a workaround...
6921 * XXX reading with out owning
6922 */
6923 if (!stats.dds_inconsistent &&
6924 dmu_objset_type(os) == DMU_OST_ZVOL) {
6925 error = zvol_get_stats(os, props);
6926 if (error == EIO)
6927 goto out;
6928 VERIFY0(error);
6929 }
6930 fnvlist_add_nvlist(outnvl, "props", props);
6931 statlist = objset_stats2nv(&stats);
6932 fnvlist_add_nvlist(outnvl, "stats", statlist);
6933 nvlist_free(statlist);
6934 }
6935
6936 out:
6937 nvlist_free(props);
6938 return (error);
6939 }
6940
6941 /*
6942 * Note: this IOC can be called internally by other IOCs as an existence
6943 * check against race conditions. Given a dataset name, return its stats
6944 * and props. Optionally we can verify type, which simplifies things for
6945 * callers that may not want to parse stats for themselves (and may discard
6946 * the outnvl in handlers).
6947 */
6948 static int
6949 zfs_ioc_objset_stats_nvl(const char *data, nvlist_t *innvl, nvlist_t *outnvl)
6950 {
6951 objset_t *os;
6952 int error;
6953 dmu_objset_type_t checktype = DMU_OST_ANY;
6954 boolean_t gettype = B_FALSE;
6955
6956 if (innvl != NULL) {
6957 if (nvlist_lookup_uint8(innvl, "type", (uint8_t *)&checktype)
6958 == 0)
6959 gettype = B_TRUE;
6960 }
6961 if ((error = dmu_objset_hold(data, FTAG, &os)) == 0) {
6962 error = objset_render(os, outnvl);
6963 dmu_objset_rele(os, FTAG);
6964
6965 if (error == 0) {
6966 nvlist_t *statlist;
6967 dmu_objset_type_t type;
6968 statlist = fnvlist_lookup_nvlist(outnvl, "stats");
6969 type = fnvlist_lookup_uint8_t(statlist, "dds_type");
6970 if (checktype != DMU_OST_ANY && type != checktype) {
6971 error = EEXIST;
6972 fnvlist_remove(outnvl, "stats");
6973 fnvlist_remove(outnvl, "props");
6974 }
6975 if (gettype)
6976 fnvlist_add_uint8(outnvl, "type", type);
6977 }
6978 }
6979
6980 return (error);
6981 }
6982
6983 /*
6984 * Given a dataset name and an innvl containing a DMU cursor offset, find the
6985 * next child dataset, and return its name, stats, and props and an updated
6986 * cursor.
6987 */
6988 static int
6989 zfs_ioc_dataset_list_next_nvl(const char *data, nvlist_t *innvl,
6990 nvlist_t *outnvl)
6991 {
6992 objset_t *os;
6993 int error;
6994 uint64_t off;
6995 char *p, *nextds;
6996 char name[MAXNAMELEN];
6997 size_t len;
6998 size_t orig_len = strlen(data);
6999
7000 if (innvl == NULL ||
7001 nvlist_lookup_uint64(innvl, "offset", &off) != 0)
7002 return (SET_ERROR(EINVAL));
7003
7004 (void) strlcpy(name, data, sizeof (name));
7005 top:
7006 if (error = dmu_objset_hold(name, FTAG, &os)) {
7007 if (error == ENOENT)
7008 error = SET_ERROR(ESRCH);
7009 return (error);
7010 }
7011
7012 p = strrchr(name, '/');
7013 if (p == NULL || p[1] != '\0') {
7014 if ((len = strlcat(name, "/", sizeof (name))) >= MAXNAMELEN) {
7015 dmu_objset_rele(os, FTAG);
7016 return (SET_ERROR(ESRCH));
7017 }
7018 } else {
7019 len = orig_len;
7020 }
7021 p = name + len;
7022
7023 do {
7024 error = dmu_dir_list_next(os, sizeof (name) - len, p, NULL,
7025 &off);
7026 if (error == ENOENT)
7027 error = ESRCH;
7028 } while (error == 0 && dataset_name_hidden(name));
7029 dmu_objset_rele(os, FTAG);
7030
7031 /*
7032 * If it's an internal dataset (ie. with a '$' in its name),
7033 * don't try to get stats for it, otherwise we'll return ENOENT.
7034 */
7035 if (error == 0 && strchr(name, '$') == NULL) {
7036 error = zfs_ioc_objset_stats_nvl(name, NULL, outnvl);
7037 if (error == ENOENT) {
7038 /* We lost a race with destroy, get the next one. */
7039 name[orig_len] = '\0';
7040 goto top;
7041 }
7042 len = strlen(name) + 1;
7043 nextds = kmem_alloc(len, KM_SLEEP);
7044 (void) strlcpy(nextds, name, len);
7045 fnvlist_add_string(outnvl, "nextds", (const char *)nextds);
7046 fnvlist_add_uint64(outnvl, "offset", off);
7047 }
7048
7049 return (error);
7050 }
7051
7052 /*
7053 * Given a dataset name and a DMU cursor offset, find its next snapshot, and
7054 * return its name, props, and stats and an updated cursor offset.
7055 */
7056 static int
7057 zfs_ioc_snapshot_list_next_nvl(const char *data, nvlist_t *innvl,
7058 nvlist_t *outnvl)
7059 {
7060 objset_t *os;
7061 int error;
7062 uint64_t off, obj;
7063 char name[MAXNAMELEN], *nextsnap;
7064 size_t len;
7065
7066 if (innvl == NULL ||
7067 nvlist_lookup_uint64(innvl, "offset", &off) != 0)
7068 return (SET_ERROR(EINVAL));
7069
7070 error = dmu_objset_hold(data, FTAG, &os);
7071 if (error != 0) {
7072 return (error == ENOENT ? ESRCH : error);
7073 }
7074
7075 /*
7076 * A dataset name of maximum length cannot have any snapshots,
7077 * so exit immediately.
7078 */
7079 (void) strlcpy(name, data, sizeof (name));
7080 if ((len = strlcat(name, "@", sizeof (name))) >= MAXNAMELEN) {
7081 dmu_objset_rele(os, FTAG);
7082 return (SET_ERROR(ESRCH));
7083 }
7084
7085 /* Rest of name buffer is passed so snap ID can be appended. */
7086 error = dmu_snapshot_list_next(os, sizeof (name) - len, name + len,
7087 &obj, &off, NULL);
7088
7089 if (error == 0) {
7090 dsl_dataset_t *ds;
7091 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
7092
7093 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
7094 if (error == 0) {
7095 objset_t *ossnap;
7096
7097 error = dmu_objset_from_ds(ds, &ossnap);
7098 if (error == 0)
7099 error = objset_render(ossnap, outnvl);
7100 dsl_dataset_rele(ds, FTAG);
7101 }
7102 } else if (error == ENOENT) {
7103 error = ESRCH;
7104 }
7105
7106 dmu_objset_rele(os, FTAG);
7107
7108 if (error == 0) {
7109 len = strlen(name) + 1;
7110 nextsnap = kmem_alloc(len, KM_SLEEP);
7111 (void) strlcpy(nextsnap, name, len);
7112 fnvlist_add_string(outnvl, "nextsnap", (const char *)nextsnap);
7113 fnvlist_add_uint64(outnvl, "offset", off);
7114 }
7115 return (error);
7116 }
7117
7118 static int
7119 zfs_ioc_pool_get_props_nvl(const char *poolname, nvlist_t *innvl,
7120 nvlist_t *outnvl)
7121 {
7122 spa_t *spa;
7123 int error;
7124 nvlist_t *props = NULL;
7125
7126 ASSERT3P(innvl, ==, NULL);
7127 if ((error = spa_open(poolname, &spa, FTAG)) != 0) {
7128 /*
7129 * If the pool is faulted, there may be properties we can still
7130 * get (such as altroot and cachefile), so attempt to get them
7131 * anyway.
7132 */
7133 mutex_enter(&spa_namespace_lock);
7134 if ((spa = spa_lookup(poolname)) != NULL)
7135 error = spa_prop_get(spa, &props);
7136 mutex_exit(&spa_namespace_lock);
7137 } else {
7138 error = spa_prop_get(spa, &props);
7139 spa_close(spa, FTAG);
7140 }
7141
7142 if (props != NULL) {
7143 fnvlist_merge(outnvl, props);
7144 nvlist_free(props);
7145 } else {
7146 ASSERT3S(error, !=, 0);
7147 }
7148
7149 return (error);
7150 }
7151
7152 /* ARGSUSED */
7153 static int
7154 zfs_ioc_check_krrp(const char *dataset, nvlist_t *innvl, nvlist_t *outnvl)
7155 {
7156 spa_t *spa;
7157 int err;
7158
7159 /*
7160 * Here we use different way to open spa for the given pool,
7161 * because the pool maybe faulted
7162 */
7163
7164 mutex_enter(&spa_namespace_lock);
7165 if ((spa = spa_lookup(dataset)) == NULL) {
7166 mutex_exit(&spa_namespace_lock);
7167 /* From KRRP side everything nice */
7168 return (0);
7169 }
7170
7171 spa_open_ref(spa, FTAG);
7172 mutex_exit(&spa_namespace_lock);
7173
7174 err = autosnap_check_for_destroy(spa_get_autosnap(spa), dataset);
7175 if (err == 0)
7176 err = ENOTSUP;
7177
7178 mutex_enter(&spa_namespace_lock);
7179 spa_close(spa, FTAG);
7180 mutex_exit(&spa_namespace_lock);
7181
7182 return (err != 0 ? SET_ERROR(err) : 0);
7183 }
7184
7185 static void
7186 zfs_ioctl_init(void)
7187 {
7188 zfs_ioctl_register("bulk_list", ZFS_IOC_BULK_LIST,
7189 zfs_ioc_list_from_cursor, zfs_secpolicy_read,
7190 DATASET_NAME, POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7191
7192 zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
7193 zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
7194 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7195
7196 zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
7197 zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
7198 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
7199
7200 zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
7201 zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
7202 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7203
7204 zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
7205 zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
7206 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7207
7208 zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
7209 zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
7210 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7211
7212 zfs_ioctl_register("create", ZFS_IOC_CREATE,
7213 zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
7214 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7215
7216 zfs_ioctl_register("clone", ZFS_IOC_CLONE,
7217 zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
7218 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7219
7220 zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
7221 zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
7222 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7223
7224 zfs_ioctl_register("check_krrp", ZFS_IOC_CHECK_KRRP,
7225 zfs_ioc_check_krrp, zfs_secpolicy_read, DATASET_NAME,
7226 POOL_CHECK_NONE, B_FALSE, B_FALSE);
7227
7228 zfs_ioctl_register("pool_stats_nvl", ZFS_IOC_POOL_STATS_NVL,
7229 zfs_ioc_pool_stats_nvl, zfs_secpolicy_read, POOL_NAME,
7230 POOL_CHECK_NONE, B_FALSE, B_FALSE);
7231
7232 zfs_ioctl_register("pool_configs_nvl", ZFS_IOC_POOL_CONFIGS_NVL,
7233 zfs_ioc_pool_configs_nvl, zfs_secpolicy_none, NO_NAME,
7234 POOL_CHECK_NONE, B_FALSE, B_FALSE);
7235
7236 zfs_ioctl_register("pool_get_props_nvl", ZFS_IOC_POOL_GET_PROPS_NVL,
7237 zfs_ioc_pool_get_props_nvl, zfs_secpolicy_read, POOL_NAME,
7238 POOL_CHECK_NONE, B_FALSE, B_FALSE);
7239
7240 zfs_ioctl_register("objset_stats_nvl", ZFS_IOC_OBJSET_STATS_NVL,
7241 zfs_ioc_objset_stats_nvl, zfs_secpolicy_read, DATASET_NAME,
7242 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7243
7244 zfs_ioctl_register("dataset_list_next_nvl",
7245 ZFS_IOC_DATASET_LIST_NEXT_NVL, zfs_ioc_dataset_list_next_nvl,
7246 zfs_secpolicy_read, DATASET_NAME, POOL_CHECK_SUSPENDED, B_FALSE,
7247 B_FALSE);
7248
7249 zfs_ioctl_register("snapshot_list_next_nvl",
7250 ZFS_IOC_SNAPSHOT_LIST_NEXT_NVL, zfs_ioc_snapshot_list_next_nvl,
7251 zfs_secpolicy_read, DATASET_NAME, POOL_CHECK_SUSPENDED, B_FALSE,
7252 B_FALSE);
7253
7254 zfs_ioctl_register("hold", ZFS_IOC_HOLD,
7255 zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
7256 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7257 zfs_ioctl_register("release", ZFS_IOC_RELEASE,
7258 zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
7259 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7260
7261 zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
7262 zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
7263 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7264
7265 zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
7266 zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
7267 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
7268
7269 zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
7270 zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
7271 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7272
7273 zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
7274 zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
7275 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
7276
7277 zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
7278 zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
7279 POOL_NAME,
7280 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7281
7282 zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
7283 zfs_ioc_channel_program, zfs_secpolicy_config,
7284 POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
7285 B_TRUE);
7286
7287 zfs_ioctl_register("set_props_mds", ZFS_IOC_SET_PROPS_MDS,
7288 zfs_ioc_set_prop_mds, zfs_secpolicy_config,
7289 POOL_NAME,
7290 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
7291
7292 /* IOCTLS that use the legacy function signature */
7293
7294 zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
7295 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
7296
7297 zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
7298 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7299 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
7300 zfs_ioc_pool_scan);
7301 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_TRIM,
7302 zfs_ioc_pool_trim);
7303 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
7304 zfs_ioc_pool_upgrade);
7305 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
7306 zfs_ioc_vdev_add);
7307 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
7308 zfs_ioc_vdev_remove);
7309 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
7310 zfs_ioc_vdev_set_state);
7311 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
7312 zfs_ioc_vdev_attach);
7313 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
7314 zfs_ioc_vdev_detach);
7315 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETL2ADDDT,
7316 zfs_ioc_vdev_setl2adddt);
7317 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
7318 zfs_ioc_vdev_setpath);
7319 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
7320 zfs_ioc_vdev_setfru);
7321 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
7322 zfs_ioc_pool_set_props);
7323 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
7324 zfs_ioc_vdev_split);
7325 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
7326 zfs_ioc_pool_reguid);
7327
7328 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
7329 zfs_ioc_pool_configs, zfs_secpolicy_none);
7330 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
7331 zfs_ioc_pool_tryimport, zfs_secpolicy_config);
7332 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
7333 zfs_ioc_inject_fault, zfs_secpolicy_inject);
7334 zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
7335 zfs_ioc_clear_fault, zfs_secpolicy_inject);
7336 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
7337 zfs_ioc_inject_list_next, zfs_secpolicy_inject);
7338
7339 /*
7340 * pool destroy, and export don't log the history as part of
7341 * zfsdev_ioctl, but rather zfs_ioc_pool_export
7342 * does the logging of those commands.
7343 */
7344 zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
7345 zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
7346 zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
7347 zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
7348
7349 zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
7350 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7351 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
7352 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7353
7354 zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
7355 zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
7356 zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
7357 zfs_ioc_dsobj_to_dsname,
7358 zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
7359 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
7360 zfs_ioc_pool_get_history,
7361 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7362
7363 zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
7364 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7365
7366 zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
7367 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7368 zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
7369 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7370 zfs_ioctl_register_pool(ZFS_IOC_VDEV_SET_PROPS, zfs_ioc_vdev_set_props,
7371 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7372 zfs_ioctl_register_pool(ZFS_IOC_VDEV_GET_PROPS, zfs_ioc_vdev_get_props,
7373 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7374 zfs_ioctl_register_pool(ZFS_IOC_COS_ALLOC, zfs_ioc_cos_alloc,
7375 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7376 zfs_ioctl_register_pool(ZFS_IOC_COS_FREE, zfs_ioc_cos_free,
7377 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7378 zfs_ioctl_register_pool(ZFS_IOC_COS_LIST, zfs_ioc_cos_list,
7379 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7380 zfs_ioctl_register_pool(ZFS_IOC_COS_SET_PROPS, zfs_ioc_cos_set_props,
7381 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7382 zfs_ioctl_register_pool(ZFS_IOC_COS_GET_PROPS, zfs_ioc_cos_get_props,
7383 zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
7384 zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
7385 zfs_ioc_space_written);
7386 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
7387 zfs_ioc_objset_recvd_props);
7388 zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
7389 zfs_ioc_next_obj);
7390 zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
7391 zfs_ioc_get_fsacl);
7392 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
7393 zfs_ioc_objset_stats);
7394 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
7395 zfs_ioc_objset_zplprops);
7396 zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
7397 zfs_ioc_dataset_list_next);
7398 zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
7399 zfs_ioc_snapshot_list_next);
7400 zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
7401 zfs_ioc_send_progress);
7402
7403 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
7404 zfs_ioc_diff, zfs_secpolicy_diff);
7405 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
7406 zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
7407 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
7408 zfs_ioc_obj_to_path, zfs_secpolicy_diff);
7409 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
7410 zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
7411 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
7412 zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
7413 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
7414 zfs_ioc_send, zfs_secpolicy_send);
7415
7416 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
7417 zfs_secpolicy_none);
7418 zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
7419 zfs_secpolicy_destroy);
7420 zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
7421 zfs_secpolicy_rename);
7422 zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
7423 zfs_secpolicy_recv);
7424 zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
7425 zfs_secpolicy_promote);
7426 zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
7427 zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
7428 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
7429 zfs_secpolicy_set_fsacl);
7430
7431 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
7432 zfs_secpolicy_share, POOL_CHECK_NONE);
7433 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
7434 zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
7435 zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
7436 zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
7437 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7438 zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
7439 zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
7440 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7441 }
7442
7443 int
7444 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
7445 zfs_ioc_poolcheck_t check)
7446 {
7447 spa_t *spa;
7448 int error;
7449
7450 ASSERT(type == POOL_NAME || type == DATASET_NAME);
7451
7452 if (check & POOL_CHECK_NONE)
7453 return (0);
7454
7455 error = spa_open(name, &spa, FTAG);
7456 if (error == 0) {
7457 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
7458 error = SET_ERROR(EAGAIN);
7459 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
7460 error = SET_ERROR(EROFS);
7461 spa_close(spa, FTAG);
7462 }
7463 return (error);
7464 }
7465
7466 /*
7467 * Find a free minor number.
7468 */
7469 minor_t
7470 zfsdev_minor_alloc(void)
7471 {
7472 static minor_t last_minor;
7473 minor_t m;
7474
7475 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7476
7477 for (m = last_minor + 1; m != last_minor; m++) {
7478 if (m > ZFSDEV_MAX_MINOR)
7479 m = 1;
7480 if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
7481 last_minor = m;
7482 return (m);
7483 }
7484 }
7485
7486 return (0);
7487 }
7488
7489 static int
7490 zfs_ctldev_init(dev_t *devp)
7491 {
7492 minor_t minor;
7493 zfs_soft_state_t *zs;
7494
7495 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7496 ASSERT(getminor(*devp) == 0);
7497
7498 minor = zfsdev_minor_alloc();
7499 if (minor == 0)
7500 return (SET_ERROR(ENXIO));
7501
7502 if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
7503 return (SET_ERROR(EAGAIN));
7504
7505 *devp = makedevice(getemajor(*devp), minor);
7506
7507 zs = ddi_get_soft_state(zfsdev_state, minor);
7508 zs->zss_type = ZSST_CTLDEV;
7509 zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
7510
7511 return (0);
7512 }
7513
7514 static void
7515 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
7516 {
7517 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
7518
7519 zfs_onexit_destroy(zo);
7520 ddi_soft_state_free(zfsdev_state, minor);
7521 }
7522
7523 void *
7524 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
7525 {
7526 zfs_soft_state_t *zp;
7527
7528 zp = ddi_get_soft_state(zfsdev_state, minor);
7529 if (zp == NULL || zp->zss_type != which)
7530 return (NULL);
7531
7532 return (zp->zss_data);
7533 }
7534
7535 static int
7536 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
7537 {
7538 int error = 0;
7539
7540 if (getminor(*devp) != 0)
7541 return (zvol_open(devp, flag, otyp, cr));
7542
7543 /* This is the control device. Allocate a new minor if requested. */
7544 if (flag & FEXCL) {
7545 mutex_enter(&zfsdev_state_lock);
7546 error = zfs_ctldev_init(devp);
7547 mutex_exit(&zfsdev_state_lock);
7548 }
7549
7550 return (error);
7551 }
7552
7553 static int
7554 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
7555 {
7556 zfs_onexit_t *zo;
7557 minor_t minor = getminor(dev);
7558
7559 if (minor == 0)
7560 return (0);
7561
7562 mutex_enter(&zfsdev_state_lock);
7563 zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
7564 if (zo == NULL) {
7565 mutex_exit(&zfsdev_state_lock);
7566 return (zvol_close(dev, flag, otyp, cr));
7567 }
7568 zfs_ctldev_destroy(zo, minor);
7569 mutex_exit(&zfsdev_state_lock);
7570
7571 return (0);
7572 }
7573
7574 static int
7575 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
7576 {
7577 zfs_cmd_t *zc;
7578 uint_t vecnum;
7579 int error, rc, len;
7580 minor_t minor = getminor(dev);
7581 const zfs_ioc_vec_t *vec;
7582 char *saved_poolname = NULL;
7583 nvlist_t *innvl = NULL;
7584
7585 if (minor != 0 &&
7586 zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
7587 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
7588
7589 vecnum = cmd - ZFS_IOC_FIRST;
7590 ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
7591
7592 if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
7593 return (SET_ERROR(EINVAL));
7594 vec = &zfs_ioc_vec[vecnum];
7595
7596 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
7597
7598 error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
7599 if (error != 0) {
7600 error = SET_ERROR(EFAULT);
7601 goto out;
7602 }
7603
7604 zc->zc_iflags = flag & FKIOCTL;
7605 if (zc->zc_nvlist_src_size != 0) {
7606 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
7607 zc->zc_iflags, &innvl);
7608 if (error != 0)
7609 goto out;
7610 }
7611
7612 /*
7613 * Ensure that all pool/dataset names are valid before we pass down to
7614 * the lower layers.
7615 */
7616 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
7617 switch (vec->zvec_namecheck) {
7618 case POOL_NAME:
7619 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
7620 error = SET_ERROR(EINVAL);
7621 else
7622 error = pool_status_check(zc->zc_name,
7623 vec->zvec_namecheck, vec->zvec_pool_check);
7624 break;
7625
7626 case DATASET_NAME:
7627 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
7628 error = SET_ERROR(EINVAL);
7629 else
7630 error = pool_status_check(zc->zc_name,
7631 vec->zvec_namecheck, vec->zvec_pool_check);
7632 break;
7633
7634 case NO_NAME:
7635 break;
7636 }
7637
7638
7639 if (error == 0)
7640 error = vec->zvec_secpolicy(zc, innvl, cr);
7641
7642 if (error != 0)
7643 goto out;
7644
7645 /* legacy ioctls can modify zc_name */
7646 len = strcspn(zc->zc_name, "/@#") + 1;
7647 saved_poolname = kmem_alloc(len, KM_SLEEP);
7648 (void) strlcpy(saved_poolname, zc->zc_name, len);
7649
7650 if (vec->zvec_func != NULL) {
7651 nvlist_t *outnvl;
7652 int puterror = 0;
7653 spa_t *spa;
7654 nvlist_t *lognv = NULL;
7655
7656 ASSERT(vec->zvec_legacy_func == NULL);
7657
7658 /*
7659 * Add the innvl to the lognv before calling the func,
7660 * in case the func changes the innvl.
7661 */
7662 if (vec->zvec_allow_log) {
7663 lognv = fnvlist_alloc();
7664 fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
7665 vec->zvec_name);
7666 if (!nvlist_empty(innvl)) {
7667 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
7668 innvl);
7669 }
7670 }
7671
7672 outnvl = fnvlist_alloc();
7673 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
7674
7675 /*
7676 * Some commands can partially execute, modfiy state, and still
7677 * return an error. In these cases, attempt to record what
7678 * was modified.
7679 */
7680 if ((error == 0 ||
7681 (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
7682 vec->zvec_allow_log &&
7683 spa_open(zc->zc_name, &spa, FTAG) == 0) {
7684 if (!nvlist_empty(outnvl)) {
7685 fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
7686 outnvl);
7687 }
7688 if (error != 0) {
7689 fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
7690 error);
7691 }
7692 (void) spa_history_log_nvl(spa, lognv);
7693 spa_close(spa, FTAG);
7694 }
7695 fnvlist_free(lognv);
7696
7697 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
7698 int smusherror = 0;
7699 if (vec->zvec_smush_outnvlist) {
7700 smusherror = nvlist_smush(outnvl,
7701 zc->zc_nvlist_dst_size);
7702 }
7703 if (smusherror == 0)
7704 puterror = put_nvlist(zc, outnvl);
7705 }
7706
7707 if (puterror != 0)
7708 error = puterror;
7709
7710 nvlist_free(outnvl);
7711 } else {
7712 error = vec->zvec_legacy_func(zc);
7713 }
7714
7715 out:
7716 nvlist_free(innvl);
7717 rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
7718 if (error == 0 && rc != 0)
7719 error = SET_ERROR(EFAULT);
7720 if (error == 0 && vec->zvec_allow_log) {
7721 char *s = tsd_get(zfs_allow_log_key);
7722 if (s != NULL)
7723 strfree(s);
7724 (void) tsd_set(zfs_allow_log_key, saved_poolname);
7725 } else {
7726 if (saved_poolname != NULL)
7727 strfree(saved_poolname);
7728 }
7729
7730 kmem_free(zc, sizeof (zfs_cmd_t));
7731 return (error);
7732 }
7733
7734 static int
7735 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
7736 {
7737 if (cmd != DDI_ATTACH)
7738 return (DDI_FAILURE);
7739
7740 if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
7741 DDI_PSEUDO, 0) == DDI_FAILURE)
7742 return (DDI_FAILURE);
7743
7744 zfs_dip = dip;
7745
7746 ddi_report_dev(dip);
7747
7748 return (DDI_SUCCESS);
7749 }
7750
7751 static int
7752 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
7753 {
7754 if (spa_busy() || zfs_busy() || zvol_busy())
7755 return (DDI_FAILURE);
7756
7757 if (cmd != DDI_DETACH)
7758 return (DDI_FAILURE);
7759
7760 zfs_dip = NULL;
7761
7762 ddi_prop_remove_all(dip);
7763 ddi_remove_minor_node(dip, NULL);
7764
7765 return (DDI_SUCCESS);
7766 }
7767
7768 /*ARGSUSED*/
7769 static int
7770 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
7771 {
7772 switch (infocmd) {
7773 case DDI_INFO_DEVT2DEVINFO:
7774 *result = zfs_dip;
7775 return (DDI_SUCCESS);
7776
7777 case DDI_INFO_DEVT2INSTANCE:
7778 *result = (void *)0;
7779 return (DDI_SUCCESS);
7780 }
7781
7782 return (DDI_FAILURE);
7783 }
7784
7785 /*
7786 * OK, so this is a little weird.
7787 *
7788 * /dev/zfs is the control node, i.e. minor 0.
7789 * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
7790 *
7791 * /dev/zfs has basically nothing to do except serve up ioctls,
7792 * so most of the standard driver entry points are in zvol.c.
7793 */
7794 static struct cb_ops zfs_cb_ops = {
7795 zfsdev_open, /* open */
7796 zfsdev_close, /* close */
7797 zvol_strategy, /* strategy */
7798 nodev, /* print */
7799 zvol_dump, /* dump */
7800 zvol_read, /* read */
7801 zvol_write, /* write */
7802 zfsdev_ioctl, /* ioctl */
7803 nodev, /* devmap */
7804 nodev, /* mmap */
7805 nodev, /* segmap */
7806 nochpoll, /* poll */
7807 ddi_prop_op, /* prop_op */
7808 NULL, /* streamtab */
7809 D_NEW | D_MP | D_64BIT, /* Driver compatibility flag */
7810 CB_REV, /* version */
7811 nodev, /* async read */
7812 nodev, /* async write */
7813 };
7814
7815 static struct dev_ops zfs_dev_ops = {
7816 DEVO_REV, /* version */
7817 0, /* refcnt */
7818 zfs_info, /* info */
7819 nulldev, /* identify */
7820 nulldev, /* probe */
7821 zfs_attach, /* attach */
7822 zfs_detach, /* detach */
7823 nodev, /* reset */
7824 &zfs_cb_ops, /* driver operations */
7825 NULL, /* no bus operations */
7826 NULL, /* power */
7827 ddi_quiesce_not_needed, /* quiesce */
7828 };
7829
7830 static struct modldrv zfs_modldrv = {
7831 &mod_driverops,
7832 "ZFS storage pool",
7833 &zfs_dev_ops
7834 };
7835
7836 static struct modlinkage modlinkage = {
7837 MODREV_1,
7838 (void *)&zfs_modlfs,
7839 (void *)&zfs_modldrv,
7840 NULL
7841 };
7842
7843 static void
7844 zfs_allow_log_destroy(void *arg)
7845 {
7846 char *poolname = arg;
7847 strfree(poolname);
7848 }
7849
7850 int
7851 _init(void)
7852 {
7853 int error;
7854
7855 spa_init(FREAD | FWRITE);
7856 zfs_init();
7857 zvol_init();
7858 zfs_ioctl_init();
7859
7860 if ((error = mod_install(&modlinkage)) != 0) {
7861 zvol_fini();
7862 zfs_fini();
7863 spa_fini();
7864 return (error);
7865 }
7866
7867 tsd_create(&zfs_fsyncer_key, NULL);
7868 tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
7869 tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
7870
7871 error = ldi_ident_from_mod(&modlinkage, &zfs_li);
7872 ASSERT(error == 0);
7873 mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
7874
7875 if (sysevent_evc_bind(ZFS_EVENT_CHANNEL, &zfs_channel,
7876 EVCH_HOLD_PEND | EVCH_CREAT) != 0)
7877 cmn_err(CE_NOTE, "Failed to bind to zfs event channel");
7878
7879 return (0);
7880 }
7881
7882 int
7883 _fini(void)
7884 {
7885 int error;
7886
7887 if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
7888 return (SET_ERROR(EBUSY));
7889
7890 if ((error = mod_remove(&modlinkage)) != 0)
7891 return (error);
7892
7893 zvol_fini();
7894 zfs_fini();
7895 spa_fini();
7896 if (zfs_nfsshare_inited)
7897 (void) ddi_modclose(nfs_mod);
7898 if (zfs_smbshare_inited)
7899 (void) ddi_modclose(smbsrv_mod);
7900 if (zfs_nfsshare_inited || zfs_smbshare_inited)
7901 (void) ddi_modclose(sharefs_mod);
7902
7903 tsd_destroy(&zfs_fsyncer_key);
7904 ldi_ident_release(zfs_li);
7905 zfs_li = NULL;
7906 mutex_destroy(&zfs_share_lock);
7907
7908 if (zfs_channel)
7909 (void) sysevent_evc_unbind(zfs_channel);
7910
7911 return (error);
7912 }
7913
7914 int
7915 _info(struct modinfo *modinfop)
7916 {
7917 return (mod_info(&modlinkage, modinfop));
7918 }