1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
26 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
27 */
28
29 /*
30 * Functions to convert between a list of vdevs and an nvlist representing the
31 * configuration. Each entry in the list can be one of:
32 *
33 * Device vdevs
34 * disk=(path=..., devid=...)
35 * file=(path=...)
36 *
37 * Group vdevs
38 * raidz[1|2]=(...)
39 * mirror=(...)
40 *
41 * Hot spares
42 *
43 * While the underlying implementation supports it, group vdevs cannot contain
44 * other group vdevs. All userland verification of devices is contained within
45 * this file. If successful, the nvlist returned can be passed directly to the
46 * kernel; we've done as much verification as possible in userland.
47 *
48 * Hot spares are a special case, and passed down as an array of disk vdevs, at
49 * the same level as the root of the vdev tree.
50 *
51 * The only function exported by this file is 'make_root_vdev'. The
52 * function performs several passes:
53 *
54 * 1. Construct the vdev specification. Performs syntax validation and
55 * makes sure each device is valid.
56 * 2. Check for devices in use. Using libdiskmgt, makes sure that no
57 * devices are also in use. Some can be overridden using the 'force'
58 * flag, others cannot.
59 * 3. Check for replication errors if the 'force' flag is not specified.
60 * validates that the replication level is consistent across the
61 * entire pool.
62 * 4. Call libzfs to label any whole disks with an EFI label.
63 */
64
65 #include <assert.h>
66 #include <devid.h>
67 #include <errno.h>
68 #include <fcntl.h>
69 #include <libdiskmgt.h>
70 #include <libintl.h>
71 #include <libnvpair.h>
72 #include <limits.h>
73 #include <stdio.h>
74 #include <string.h>
75 #include <unistd.h>
76 #include <sys/efi_partition.h>
77 #include <sys/stat.h>
78 #include <sys/vtoc.h>
79 #include <sys/mntent.h>
80
81 #include "zpool_util.h"
82
83 #define BACKUP_SLICE "s2"
84
85 /*
86 * For any given vdev specification, we can have multiple errors. The
87 * vdev_error() function keeps track of whether we have seen an error yet, and
88 * prints out a header if its the first error we've seen.
89 */
90 boolean_t error_seen;
91 boolean_t is_force;
92
93 /*PRINTFLIKE1*/
94 static void
95 vdev_error(const char *fmt, ...)
96 {
97 va_list ap;
98
99 if (!error_seen) {
100 (void) fprintf(stderr, gettext("invalid vdev specification\n"));
101 if (!is_force)
102 (void) fprintf(stderr, gettext("use '-f' to override "
103 "the following errors:\n"));
104 else
105 (void) fprintf(stderr, gettext("the following errors "
106 "must be manually repaired:\n"));
107 error_seen = B_TRUE;
108 }
109
110 va_start(ap, fmt);
111 (void) vfprintf(stderr, fmt, ap);
112 va_end(ap);
113 }
114
115 static void
116 libdiskmgt_error(int error)
117 {
118 /*
119 * ENXIO/ENODEV is a valid error message if the device doesn't live in
120 * /dev/dsk. Don't bother printing an error message in this case.
121 */
122 if (error == ENXIO || error == ENODEV)
123 return;
124
125 (void) fprintf(stderr, gettext("warning: device in use checking "
126 "failed: %s\n"), strerror(error));
127 }
128
129 /*
130 * Validate a device, passing the bulk of the work off to libdiskmgt.
131 */
132 static int
133 check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
134 {
135 char *msg;
136 int error = 0;
137 dm_who_type_t who;
138
139 if (force)
140 who = DM_WHO_ZPOOL_FORCE;
141 else if (isspare)
142 who = DM_WHO_ZPOOL_SPARE;
143 else
144 who = DM_WHO_ZPOOL;
145
146 if (dm_inuse((char *)path, &msg, who, &error) || error) {
147 if (error != 0) {
148 libdiskmgt_error(error);
149 return (0);
150 } else {
151 vdev_error("%s", msg);
152 free(msg);
153 return (-1);
154 }
155 }
156
157 /*
158 * If we're given a whole disk, ignore overlapping slices since we're
159 * about to label it anyway.
160 */
161 error = 0;
162 if (!wholedisk && !force &&
163 (dm_isoverlapping((char *)path, &msg, &error) || error)) {
164 if (error == 0) {
165 /* dm_isoverlapping returned -1 */
166 vdev_error(gettext("%s overlaps with %s\n"), path, msg);
167 free(msg);
168 return (-1);
169 } else if (error != ENODEV) {
170 /* libdiskmgt's devcache only handles physical drives */
171 libdiskmgt_error(error);
172 return (0);
173 }
174 }
175
176 return (0);
177 }
178
179
180 /*
181 * Validate a whole disk. Iterate over all slices on the disk and make sure
182 * that none is in use by calling check_slice().
183 */
184 static int
185 check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
186 {
187 dm_descriptor_t *drive, *media, *slice;
188 int err = 0;
189 int i;
190 int ret;
191
192 /*
193 * Get the drive associated with this disk. This should never fail,
194 * because we already have an alias handle open for the device.
195 */
196 if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
197 &err)) == NULL || *drive == NULL) {
198 if (err)
199 libdiskmgt_error(err);
200 return (0);
201 }
202
203 if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
204 &err)) == NULL) {
205 dm_free_descriptors(drive);
206 if (err)
207 libdiskmgt_error(err);
208 return (0);
209 }
210
211 dm_free_descriptors(drive);
212
213 /*
214 * It is possible that the user has specified a removable media drive,
215 * and the media is not present.
216 */
217 if (*media == NULL) {
218 dm_free_descriptors(media);
219 vdev_error(gettext("'%s' has no media in drive\n"), name);
220 return (-1);
221 }
222
223 if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
224 &err)) == NULL) {
225 dm_free_descriptors(media);
226 if (err)
227 libdiskmgt_error(err);
228 return (0);
229 }
230
231 dm_free_descriptors(media);
232
233 ret = 0;
234
235 /*
236 * Iterate over all slices and report any errors. We don't care about
237 * overlapping slices because we are using the whole disk.
238 */
239 for (i = 0; slice[i] != NULL; i++) {
240 char *name = dm_get_name(slice[i], &err);
241
242 if (check_slice(name, force, B_TRUE, isspare) != 0)
243 ret = -1;
244
245 dm_free_name(name);
246 }
247
248 dm_free_descriptors(slice);
249 return (ret);
250 }
251
252 /*
253 * Validate a device.
254 */
255 static int
256 check_device(const char *path, boolean_t force, boolean_t isspare)
257 {
258 dm_descriptor_t desc;
259 int err;
260 char *dev;
261
262 /*
263 * For whole disks, libdiskmgt does not include the leading dev path.
264 */
265 dev = strrchr(path, '/');
266 assert(dev != NULL);
267 dev++;
268 if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
269 err = check_disk(path, desc, force, isspare);
270 dm_free_descriptor(desc);
271 return (err);
272 }
273
274 return (check_slice(path, force, B_FALSE, isspare));
275 }
276
277 /*
278 * Check that a file is valid. All we can do in this case is check that it's
279 * not in use by another pool, and not in use by swap.
280 */
281 static int
282 check_file(const char *file, boolean_t force, boolean_t isspare)
283 {
284 char *name;
285 int fd;
286 int ret = 0;
287 int err;
288 pool_state_t state;
289 boolean_t inuse;
290
291 if (dm_inuse_swap(file, &err)) {
292 if (err)
293 libdiskmgt_error(err);
294 else
295 vdev_error(gettext("%s is currently used by swap. "
296 "Please see swap(1M).\n"), file);
297 return (-1);
298 }
299
300 if ((fd = open(file, O_RDONLY)) < 0)
301 return (0);
302
303 if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
304 const char *desc;
305
306 switch (state) {
307 case POOL_STATE_ACTIVE:
308 desc = gettext("active");
309 break;
310
311 case POOL_STATE_EXPORTED:
312 desc = gettext("exported");
313 break;
314
315 case POOL_STATE_POTENTIALLY_ACTIVE:
316 desc = gettext("potentially active");
317 break;
318
319 default:
320 desc = gettext("unknown");
321 break;
322 }
323
324 /*
325 * Allow hot spares to be shared between pools.
326 */
327 if (state == POOL_STATE_SPARE && isspare)
328 return (0);
329
330 if (state == POOL_STATE_ACTIVE ||
331 state == POOL_STATE_SPARE || !force) {
332 switch (state) {
333 case POOL_STATE_SPARE:
334 vdev_error(gettext("%s is reserved as a hot "
335 "spare for pool %s\n"), file, name);
336 break;
337 default:
338 vdev_error(gettext("%s is part of %s pool "
339 "'%s'\n"), file, desc, name);
340 break;
341 }
342 ret = -1;
343 }
344
345 free(name);
346 }
347
348 (void) close(fd);
349 return (ret);
350 }
351
352
353 /*
354 * By "whole disk" we mean an entire physical disk (something we can
355 * label, toggle the write cache on, etc.) as opposed to the full
356 * capacity of a pseudo-device such as lofi or did. We act as if we
357 * are labeling the disk, which should be a pretty good test of whether
358 * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
359 * it isn't.
360 */
361 static boolean_t
362 is_whole_disk(const char *arg)
363 {
364 struct dk_gpt *label;
365 int fd;
366 char path[MAXPATHLEN];
367
368 (void) snprintf(path, sizeof (path), "%s%s%s",
369 ZFS_RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
370 if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
371 return (B_FALSE);
372 if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
373 (void) close(fd);
374 return (B_FALSE);
375 }
376 efi_free(label);
377 (void) close(fd);
378 return (B_TRUE);
379 }
380
381 /*
382 * Create a leaf vdev. Determine if this is a file or a device. If it's a
383 * device, fill in the device id to make a complete nvlist. Valid forms for a
384 * leaf vdev are:
385 *
386 * /dev/dsk/xxx Complete disk path
387 * /xxx Full path to file
388 * xxx Shorthand for /dev/dsk/xxx
389 */
390 static nvlist_t *
391 make_leaf_vdev(const char *arg, uint64_t is_log, uint64_t is_special)
392 {
393 char path[MAXPATHLEN];
394 struct stat64 statbuf;
395 nvlist_t *vdev = NULL;
396 char *type = NULL;
397 boolean_t wholedisk = B_FALSE;
398
399 /*
400 * Determine what type of vdev this is, and put the full path into
401 * 'path'. We detect whether this is a device of file afterwards by
402 * checking the st_mode of the file.
403 */
404 if (arg[0] == '/') {
405 /*
406 * Complete device or file path. Exact type is determined by
407 * examining the file descriptor afterwards.
408 */
409 wholedisk = is_whole_disk(arg);
410 if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
411 (void) fprintf(stderr,
412 gettext("cannot open '%s': %s\n"),
413 arg, strerror(errno));
414 return (NULL);
415 }
416
417 (void) strlcpy(path, arg, sizeof (path));
418 } else {
419 /*
420 * This may be a short path for a device, or it could be total
421 * gibberish. Check to see if it's a known device in
422 * /dev/dsk/. As part of this check, see if we've been given a
423 * an entire disk (minus the slice number).
424 */
425 (void) snprintf(path, sizeof (path), "%s/%s", ZFS_DISK_ROOT,
426 arg);
427 wholedisk = is_whole_disk(path);
428 if (!wholedisk && (stat64(path, &statbuf) != 0)) {
429 /*
430 * If we got ENOENT, then the user gave us
431 * gibberish, so try to direct them with a
432 * reasonable error message. Otherwise,
433 * regurgitate strerror() since it's the best we
434 * can do.
435 */
436 if (errno == ENOENT) {
437 (void) fprintf(stderr,
438 gettext("cannot open '%s': no such "
439 "device in %s\n"), arg, ZFS_DISK_ROOT);
440 (void) fprintf(stderr,
441 gettext("must be a full path or "
442 "shorthand device name\n"));
443 return (NULL);
444 } else {
445 (void) fprintf(stderr,
446 gettext("cannot open '%s': %s\n"),
447 path, strerror(errno));
448 return (NULL);
449 }
450 }
451 }
452
453 /*
454 * Determine whether this is a device or a file.
455 */
456 if (wholedisk || S_ISBLK(statbuf.st_mode)) {
457 type = VDEV_TYPE_DISK;
458 } else if (S_ISREG(statbuf.st_mode)) {
459 type = VDEV_TYPE_FILE;
460 } else {
461 (void) fprintf(stderr, gettext("cannot use '%s': must be a "
462 "block device or regular file\n"), path);
463 return (NULL);
464 }
465
466 /*
467 * Finally, we have the complete device or file, and we know that it is
468 * acceptable to use. Construct the nvlist to describe this vdev. All
469 * vdevs have a 'path' element, and devices also have a 'devid' element.
470 */
471 verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
472 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
473 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
474 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
475 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_SPECIAL,
476 is_special) == 0);
477 if (strcmp(type, VDEV_TYPE_DISK) == 0)
478 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
479 (uint64_t)wholedisk) == 0);
480
481 /*
482 * For a whole disk, defer getting its devid until after labeling it.
483 */
484 if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
485 /*
486 * Get the devid for the device.
487 */
488 int fd;
489 ddi_devid_t devid;
490 char *minor = NULL, *devid_str = NULL;
491
492 if ((fd = open(path, O_RDONLY)) < 0) {
493 (void) fprintf(stderr, gettext("cannot open '%s': "
494 "%s\n"), path, strerror(errno));
495 nvlist_free(vdev);
496 return (NULL);
497 }
498
499 if (devid_get(fd, &devid) == 0) {
500 if (devid_get_minor_name(fd, &minor) == 0 &&
501 (devid_str = devid_str_encode(devid, minor)) !=
502 NULL) {
503 verify(nvlist_add_string(vdev,
504 ZPOOL_CONFIG_DEVID, devid_str) == 0);
505 }
506 if (devid_str != NULL)
507 devid_str_free(devid_str);
508 if (minor != NULL)
509 devid_str_free(minor);
510 devid_free(devid);
511 }
512
513 (void) close(fd);
514 }
515
516 return (vdev);
517 }
518
519 /*
520 * Go through and verify the replication level of the pool is consistent.
521 * Performs the following checks:
522 *
523 * For the new spec, verifies that devices in mirrors and raidz are the
524 * same size.
525 *
526 * If the current configuration already has inconsistent replication
527 * levels, ignore any other potential problems in the new spec.
528 *
529 * Otherwise, make sure that the current spec (if there is one) and the new
530 * spec have consistent replication levels.
531 */
532 typedef struct replication_level {
533 char *zprl_type;
534 uint64_t zprl_children;
535 uint64_t zprl_parity;
536 } replication_level_t;
537
538 #define ZPOOL_FUZZ (16 * 1024 * 1024)
539
540 /*
541 * Given a list of toplevel vdevs, return the current replication level. If
542 * the config is inconsistent, then NULL is returned. If 'fatal' is set, then
543 * an error message will be displayed for each self-inconsistent vdev.
544 */
545 static replication_level_t *
546 get_replication(nvlist_t *nvroot, boolean_t fatal)
547 {
548 nvlist_t **top;
549 uint_t t, toplevels;
550 nvlist_t **child;
551 uint_t c, children;
552 nvlist_t *nv;
553 char *type;
554 replication_level_t lastrep = {0};
555 replication_level_t rep;
556 replication_level_t *ret;
557 boolean_t dontreport;
558
559 ret = safe_malloc(sizeof (replication_level_t));
560
561 verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
562 &top, &toplevels) == 0);
563
564 for (t = 0; t < toplevels; t++) {
565 uint64_t is_log = B_FALSE;
566
567 nv = top[t];
568
569 /*
570 * For separate logs we ignore the top level vdev replication
571 * constraints.
572 */
573 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
574 if (is_log)
575 continue;
576
577 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE,
578 &type) == 0);
579 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
580 &child, &children) != 0) {
581 /*
582 * This is a 'file' or 'disk' vdev.
583 */
584 rep.zprl_type = type;
585 rep.zprl_children = 1;
586 rep.zprl_parity = 0;
587 } else {
588 uint64_t vdev_size;
589
590 /*
591 * This is a mirror or RAID-Z vdev. Go through and make
592 * sure the contents are all the same (files vs. disks),
593 * keeping track of the number of elements in the
594 * process.
595 *
596 * We also check that the size of each vdev (if it can
597 * be determined) is the same.
598 */
599 rep.zprl_type = type;
600 rep.zprl_children = 0;
601
602 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
603 verify(nvlist_lookup_uint64(nv,
604 ZPOOL_CONFIG_NPARITY,
605 &rep.zprl_parity) == 0);
606 assert(rep.zprl_parity != 0);
607 } else {
608 rep.zprl_parity = 0;
609 }
610
611 /*
612 * The 'dontreport' variable indicates that we've
613 * already reported an error for this spec, so don't
614 * bother doing it again.
615 */
616 type = NULL;
617 dontreport = 0;
618 vdev_size = -1ULL;
619 for (c = 0; c < children; c++) {
620 nvlist_t *cnv = child[c];
621 char *path;
622 struct stat64 statbuf;
623 uint64_t size = -1ULL;
624 char *childtype;
625 int fd, err;
626
627 rep.zprl_children++;
628
629 verify(nvlist_lookup_string(cnv,
630 ZPOOL_CONFIG_TYPE, &childtype) == 0);
631
632 /*
633 * If this is a replacing or spare vdev, then
634 * get the real first child of the vdev.
635 */
636 if (strcmp(childtype,
637 VDEV_TYPE_REPLACING) == 0 ||
638 strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
639 nvlist_t **rchild;
640 uint_t rchildren;
641
642 verify(nvlist_lookup_nvlist_array(cnv,
643 ZPOOL_CONFIG_CHILDREN, &rchild,
644 &rchildren) == 0);
645 assert(rchildren == 2);
646 cnv = rchild[0];
647
648 verify(nvlist_lookup_string(cnv,
649 ZPOOL_CONFIG_TYPE,
650 &childtype) == 0);
651 }
652
653 verify(nvlist_lookup_string(cnv,
654 ZPOOL_CONFIG_PATH, &path) == 0);
655
656 /*
657 * If we have a raidz/mirror that combines disks
658 * with files, report it as an error.
659 */
660 if (!dontreport && type != NULL &&
661 strcmp(type, childtype) != 0) {
662 if (ret != NULL)
663 free(ret);
664 ret = NULL;
665 if (fatal)
666 vdev_error(gettext(
667 "mismatched replication "
668 "level: %s contains both "
669 "files and devices\n"),
670 rep.zprl_type);
671 else
672 return (NULL);
673 dontreport = B_TRUE;
674 }
675
676 /*
677 * According to stat(2), the value of 'st_size'
678 * is undefined for block devices and character
679 * devices. But there is no effective way to
680 * determine the real size in userland.
681 *
682 * Instead, we'll take advantage of an
683 * implementation detail of spec_size(). If the
684 * device is currently open, then we (should)
685 * return a valid size.
686 *
687 * If we still don't get a valid size (indicated
688 * by a size of 0 or MAXOFFSET_T), then ignore
689 * this device altogether.
690 */
691 if ((fd = open(path, O_RDONLY)) >= 0) {
692 err = fstat64(fd, &statbuf);
693 (void) close(fd);
694 } else {
695 err = stat64(path, &statbuf);
696 }
697
698 if (err != 0 ||
699 statbuf.st_size == 0 ||
700 statbuf.st_size == MAXOFFSET_T)
701 continue;
702
703 size = statbuf.st_size;
704
705 /*
706 * Also make sure that devices and
707 * slices have a consistent size. If
708 * they differ by a significant amount
709 * (~16MB) then report an error.
710 */
711 if (!dontreport &&
712 (vdev_size != -1ULL &&
713 (labs(size - vdev_size) >
714 ZPOOL_FUZZ))) {
715 if (ret != NULL)
716 free(ret);
717 ret = NULL;
718 if (fatal)
719 vdev_error(gettext(
720 "%s contains devices of "
721 "different sizes\n"),
722 rep.zprl_type);
723 else
724 return (NULL);
725 dontreport = B_TRUE;
726 }
727
728 type = childtype;
729 vdev_size = size;
730 }
731 }
732
733 /*
734 * At this point, we have the replication of the last toplevel
735 * vdev in 'rep'. Compare it to 'lastrep' to see if its
736 * different.
737 */
738 if (lastrep.zprl_type != NULL) {
739 if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
740 if (ret != NULL)
741 free(ret);
742 ret = NULL;
743 if (fatal)
744 vdev_error(gettext(
745 "mismatched replication level: "
746 "both %s and %s vdevs are "
747 "present\n"),
748 lastrep.zprl_type, rep.zprl_type);
749 else
750 return (NULL);
751 } else if (lastrep.zprl_parity != rep.zprl_parity) {
752 if (ret)
753 free(ret);
754 ret = NULL;
755 if (fatal)
756 vdev_error(gettext(
757 "mismatched replication level: "
758 "both %llu and %llu device parity "
759 "%s vdevs are present\n"),
760 lastrep.zprl_parity,
761 rep.zprl_parity,
762 rep.zprl_type);
763 else
764 return (NULL);
765 } else if (lastrep.zprl_children != rep.zprl_children) {
766 if (ret)
767 free(ret);
768 ret = NULL;
769 if (fatal)
770 vdev_error(gettext(
771 "mismatched replication level: "
772 "both %llu-way and %llu-way %s "
773 "vdevs are present\n"),
774 lastrep.zprl_children,
775 rep.zprl_children,
776 rep.zprl_type);
777 else
778 return (NULL);
779 }
780 }
781 lastrep = rep;
782 }
783
784 if (ret != NULL)
785 *ret = rep;
786
787 return (ret);
788 }
789
790 /*
791 * Check the replication level of the vdev spec against the current pool. Calls
792 * get_replication() to make sure the new spec is self-consistent. If the pool
793 * has a consistent replication level, then we ignore any errors. Otherwise,
794 * report any difference between the two.
795 */
796 static int
797 check_replication(nvlist_t *config, nvlist_t *newroot)
798 {
799 nvlist_t **child;
800 uint_t children;
801 replication_level_t *current = NULL, *new;
802 int ret;
803
804 /*
805 * If we have a current pool configuration, check to see if it's
806 * self-consistent. If not, simply return success.
807 */
808 if (config != NULL) {
809 nvlist_t *nvroot;
810
811 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
812 &nvroot) == 0);
813 if ((current = get_replication(nvroot, B_FALSE)) == NULL)
814 return (0);
815 }
816 /*
817 * for spares there may be no children, and therefore no
818 * replication level to check
819 */
820 if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN,
821 &child, &children) != 0) || (children == 0)) {
822 free(current);
823 return (0);
824 }
825
826 /*
827 * If all we have is logs then there's no replication level to check.
828 */
829 if (num_logs(newroot) == children) {
830 free(current);
831 return (0);
832 }
833
834 /*
835 * Get the replication level of the new vdev spec, reporting any
836 * inconsistencies found.
837 */
838 if ((new = get_replication(newroot, B_TRUE)) == NULL) {
839 free(current);
840 return (-1);
841 }
842
843 /*
844 * Check to see if the new vdev spec matches the replication level of
845 * the current pool.
846 */
847 ret = 0;
848 if (current != NULL) {
849 if (strcmp(current->zprl_type, new->zprl_type) != 0) {
850 vdev_error(gettext(
851 "mismatched replication level: pool uses %s "
852 "and new vdev is %s\n"),
853 current->zprl_type, new->zprl_type);
854 ret = -1;
855 } else if (current->zprl_parity != new->zprl_parity) {
856 vdev_error(gettext(
857 "mismatched replication level: pool uses %llu "
858 "device parity and new vdev uses %llu\n"),
859 current->zprl_parity, new->zprl_parity);
860 ret = -1;
861 } else if (current->zprl_children != new->zprl_children) {
862 vdev_error(gettext(
863 "mismatched replication level: pool uses %llu-way "
864 "%s and new vdev uses %llu-way %s\n"),
865 current->zprl_children, current->zprl_type,
866 new->zprl_children, new->zprl_type);
867 ret = -1;
868 }
869 }
870
871 free(new);
872 if (current != NULL)
873 free(current);
874
875 return (ret);
876 }
877
878 /*
879 * Go through and find any whole disks in the vdev specification, labelling them
880 * as appropriate. When constructing the vdev spec, we were unable to open this
881 * device in order to provide a devid. Now that we have labelled the disk and
882 * know the pool slice is valid, we can construct the devid now.
883 *
884 * If the disk was already labeled with an EFI label, we will have gotten the
885 * devid already (because we were able to open the whole disk). Otherwise, we
886 * need to get the devid after we label the disk.
887 */
888 static int
889 make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type,
890 uint64_t boot_size)
891 {
892 nvlist_t **child;
893 uint_t c, children;
894 char *type, *path, *diskname;
895 char buf[MAXPATHLEN];
896 uint64_t wholedisk;
897 int fd;
898 int ret;
899 int slice;
900 ddi_devid_t devid;
901 char *minor = NULL, *devid_str = NULL;
902
903 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
904
905 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
906 &child, &children) != 0) {
907
908 if (strcmp(type, VDEV_TYPE_DISK) != 0)
909 return (0);
910
911 /*
912 * We have a disk device. Get the path to the device
913 * and see if it's a whole disk by appending the backup
914 * slice and stat()ing the device.
915 */
916 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
917
918 diskname = strrchr(path, '/');
919 assert(diskname != NULL);
920 diskname++;
921
922 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
923 &wholedisk) != 0 || !wholedisk) {
924 /*
925 * This is not whole disk, return error if
926 * boot partition creation was requested
927 */
928 if (boot_type == ZPOOL_CREATE_BOOT_LABEL) {
929 (void) fprintf(stderr,
930 gettext("creating boot partition is only "
931 "supported on whole disk vdevs: %s\n"),
932 diskname);
933 return (-1);
934 }
935 return (0);
936 }
937
938 ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type,
939 boot_size, &slice);
940 if (ret == -1)
941 return (ret);
942
943 /*
944 * Fill in the devid, now that we've labeled the disk.
945 */
946 (void) snprintf(buf, sizeof (buf), "%ss%d", path, slice);
947 if ((fd = open(buf, O_RDONLY)) < 0) {
948 (void) fprintf(stderr,
949 gettext("cannot open '%s': %s\n"),
950 buf, strerror(errno));
951 return (-1);
952 }
953
954 if (devid_get(fd, &devid) == 0) {
955 if (devid_get_minor_name(fd, &minor) == 0 &&
956 (devid_str = devid_str_encode(devid, minor)) !=
957 NULL) {
958 verify(nvlist_add_string(nv,
959 ZPOOL_CONFIG_DEVID, devid_str) == 0);
960 }
961 if (devid_str != NULL)
962 devid_str_free(devid_str);
963 if (minor != NULL)
964 devid_str_free(minor);
965 devid_free(devid);
966 }
967
968 /*
969 * Update the path to refer to the pool slice. The presence of
970 * the 'whole_disk' field indicates to the CLI that we should
971 * chop off the slice number when displaying the device in
972 * future output.
973 */
974 verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
975
976 (void) close(fd);
977
978 return (0);
979 }
980
981 /* illumos kernel does not support booting from multi-vdev pools. */
982 if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) {
983 if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) {
984 (void) fprintf(stderr, gettext("boot pool "
985 "can not have more than one vdev\n"));
986 return (-1);
987 }
988 }
989
990 for (c = 0; c < children; c++) {
991 ret = make_disks(zhp, child[c], boot_type, boot_size);
992 if (ret != 0)
993 return (ret);
994 }
995
996 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
997 &child, &children) == 0)
998 for (c = 0; c < children; c++) {
999 ret = make_disks(zhp, child[c], boot_type, boot_size);
1000 if (ret != 0)
1001 return (ret);
1002 }
1003
1004 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1005 &child, &children) == 0)
1006 for (c = 0; c < children; c++) {
1007 ret = make_disks(zhp, child[c], boot_type, boot_size);
1008 if (ret != 0)
1009 return (ret);
1010 }
1011
1012 return (0);
1013 }
1014
1015 /*
1016 * Determine if the given path is a hot spare within the given configuration.
1017 */
1018 static boolean_t
1019 is_spare(nvlist_t *config, const char *path)
1020 {
1021 int fd;
1022 pool_state_t state;
1023 char *name = NULL;
1024 nvlist_t *label;
1025 uint64_t guid, spareguid;
1026 nvlist_t *nvroot;
1027 nvlist_t **spares;
1028 uint_t i, nspares;
1029 boolean_t inuse;
1030
1031 if ((fd = open(path, O_RDONLY)) < 0)
1032 return (B_FALSE);
1033
1034 if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
1035 !inuse ||
1036 state != POOL_STATE_SPARE ||
1037 zpool_read_label(fd, &label) != 0) {
1038 free(name);
1039 (void) close(fd);
1040 return (B_FALSE);
1041 }
1042 free(name);
1043 (void) close(fd);
1044
1045 verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
1046 nvlist_free(label);
1047
1048 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
1049 &nvroot) == 0);
1050 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1051 &spares, &nspares) == 0) {
1052 for (i = 0; i < nspares; i++) {
1053 verify(nvlist_lookup_uint64(spares[i],
1054 ZPOOL_CONFIG_GUID, &spareguid) == 0);
1055 if (spareguid == guid)
1056 return (B_TRUE);
1057 }
1058 }
1059
1060 return (B_FALSE);
1061 }
1062
1063 /*
1064 * Go through and find any devices that are in use. We rely on libdiskmgt for
1065 * the majority of this task.
1066 */
1067 static boolean_t
1068 is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
1069 boolean_t replacing, boolean_t isspare)
1070 {
1071 nvlist_t **child;
1072 uint_t c, children;
1073 char *type, *path;
1074 int ret = 0;
1075 char buf[MAXPATHLEN];
1076 uint64_t wholedisk;
1077 boolean_t anyinuse = B_FALSE;
1078
1079 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
1080
1081 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1082 &child, &children) != 0) {
1083
1084 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
1085
1086 /*
1087 * As a generic check, we look to see if this is a replace of a
1088 * hot spare within the same pool. If so, we allow it
1089 * regardless of what libdiskmgt or zpool_in_use() says.
1090 */
1091 if (replacing) {
1092 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1093 &wholedisk) == 0 && wholedisk)
1094 (void) snprintf(buf, sizeof (buf), "%ss0",
1095 path);
1096 else
1097 (void) strlcpy(buf, path, sizeof (buf));
1098
1099 if (is_spare(config, buf))
1100 return (B_FALSE);
1101 }
1102
1103 if (strcmp(type, VDEV_TYPE_DISK) == 0)
1104 ret = check_device(path, force, isspare);
1105 else if (strcmp(type, VDEV_TYPE_FILE) == 0)
1106 ret = check_file(path, force, isspare);
1107
1108 return (ret != 0);
1109 }
1110
1111 for (c = 0; c < children; c++)
1112 if (is_device_in_use(config, child[c], force, replacing,
1113 B_FALSE))
1114 anyinuse = B_TRUE;
1115
1116 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1117 &child, &children) == 0)
1118 for (c = 0; c < children; c++)
1119 if (is_device_in_use(config, child[c], force, replacing,
1120 B_TRUE))
1121 anyinuse = B_TRUE;
1122
1123 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1124 &child, &children) == 0)
1125 for (c = 0; c < children; c++)
1126 if (is_device_in_use(config, child[c], force, replacing,
1127 B_FALSE))
1128 anyinuse = B_TRUE;
1129
1130 return (anyinuse);
1131 }
1132
1133 static const char *
1134 is_grouping(const char *type, int *mindev, int *maxdev)
1135 {
1136 if (strncmp(type, "raidz", 5) == 0) {
1137 const char *p = type + 5;
1138 char *end;
1139 long nparity;
1140
1141 if (*p == '\0') {
1142 nparity = 1;
1143 } else if (*p == '0') {
1144 return (NULL); /* no zero prefixes allowed */
1145 } else {
1146 errno = 0;
1147 nparity = strtol(p, &end, 10);
1148 if (errno != 0 || nparity < 1 || nparity >= 255 ||
1149 *end != '\0')
1150 return (NULL);
1151 }
1152
1153 if (mindev != NULL)
1154 *mindev = nparity + 1;
1155 if (maxdev != NULL)
1156 *maxdev = 255;
1157 return (VDEV_TYPE_RAIDZ);
1158 }
1159
1160 if (maxdev != NULL)
1161 *maxdev = INT_MAX;
1162
1163 if (strcmp(type, "mirror") == 0) {
1164 if (mindev != NULL)
1165 *mindev = 2;
1166 return (VDEV_TYPE_MIRROR);
1167 }
1168
1169 if (strcmp(type, "spare") == 0) {
1170 if (mindev != NULL)
1171 *mindev = 1;
1172 return (VDEV_TYPE_SPARE);
1173 }
1174
1175 if (strcmp(type, "log") == 0) {
1176 if (mindev != NULL)
1177 *mindev = 1;
1178 return (VDEV_TYPE_LOG);
1179 }
1180
1181 if (strcmp(type, "cache") == 0) {
1182 if (mindev != NULL)
1183 *mindev = 1;
1184 return (VDEV_TYPE_L2CACHE);
1185 }
1186
1187 if (strcmp(type, "special") == 0) {
1188 if (mindev != NULL)
1189 *mindev = 1;
1190 return (VDEV_TYPE_SPECIAL);
1191 }
1192
1193 return (NULL);
1194 }
1195
1196 /*
1197 * Construct a syntactically valid vdev specification,
1198 * and ensure that all devices and files exist and can be opened.
1199 * Note: we don't bother freeing anything in the error paths
1200 * because the program is just going to exit anyway.
1201 */
1202 nvlist_t *
1203 construct_spec(int argc, char **argv)
1204 {
1205 nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
1206 int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
1207 int nspecial = 0;
1208 const char *type;
1209 boolean_t is_log, seen_logs;
1210 boolean_t is_special, seen_special;
1211
1212 top = NULL;
1213 toplevels = 0;
1214 spares = NULL;
1215 l2cache = NULL;
1216 nspares = 0;
1217 nlogs = 0;
1218 nl2cache = 0;
1219 is_log = B_FALSE;
1220 seen_logs = B_FALSE;
1221 is_special = B_FALSE;
1222 seen_special = B_FALSE;
1223
1224 while (argc > 0) {
1225 nv = NULL;
1226
1227 /*
1228 * If it's a mirror or raidz, the subsequent arguments are
1229 * its leaves -- until we encounter the next mirror or raidz.
1230 */
1231 if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
1232 nvlist_t **child = NULL;
1233 int c, children = 0;
1234
1235 if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1236 if (spares != NULL) {
1237 (void) fprintf(stderr,
1238 gettext("invalid vdev "
1239 "specification: 'spare' can be "
1240 "specified only once\n"));
1241 return (NULL);
1242 }
1243 is_log = B_FALSE;
1244 is_special = B_FALSE;
1245 }
1246
1247 if (strcmp(type, VDEV_TYPE_LOG) == 0) {
1248 if (seen_logs) {
1249 (void) fprintf(stderr,
1250 gettext("invalid vdev "
1251 "specification: 'log' can be "
1252 "specified only once\n"));
1253 return (NULL);
1254 }
1255 seen_logs = B_TRUE;
1256 is_log = B_TRUE;
1257 is_special = B_FALSE;
1258 argc--;
1259 argv++;
1260 /*
1261 * A log is not a real grouping device.
1262 * We just set is_log and continue.
1263 */
1264 continue;
1265 }
1266
1267 if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1268 if (l2cache != NULL) {
1269 (void) fprintf(stderr,
1270 gettext("invalid vdev "
1271 "specification: 'cache' can be "
1272 "specified only once\n"));
1273 return (NULL);
1274 }
1275 is_log = B_FALSE;
1276 is_special = B_FALSE;
1277 }
1278
1279 if (strcmp(type, VDEV_TYPE_SPECIAL) == 0) {
1280 if (seen_special) {
1281 (void) fprintf(stderr,
1282 gettext("invalid vdev "
1283 "specification: 'special' can be "
1284 "specified only once\n"));
1285 return (NULL);
1286 }
1287 seen_special = B_TRUE;
1288 is_log = B_FALSE;
1289 is_special = B_TRUE;
1290 argc--;
1291 argv++;
1292 /*
1293 * A special is not a real grouping device.
1294 * We just set is_special and continue.
1295 */
1296 continue;
1297 }
1298
1299 if (is_log) {
1300 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
1301 (void) fprintf(stderr,
1302 gettext("invalid vdev "
1303 "specification: unsupported 'log' "
1304 "device: %s\n"), type);
1305 return (NULL);
1306 }
1307 nlogs++;
1308 }
1309
1310 if (is_special) {
1311 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
1312 (void) fprintf(stderr,
1313 gettext("invalid vdev "
1314 "specification: unsupported "
1315 "'special' device: %s\n"), type);
1316 return (NULL);
1317 }
1318 nspecial++;
1319 }
1320
1321 for (c = 1; c < argc; c++) {
1322 if (is_grouping(argv[c], NULL, NULL) != NULL)
1323 break;
1324 children++;
1325 child = realloc(child,
1326 children * sizeof (nvlist_t *));
1327 if (child == NULL)
1328 zpool_no_memory();
1329 if ((nv = make_leaf_vdev(argv[c],
1330 (uint64_t)B_FALSE,
1331 (uint64_t)B_FALSE)) == NULL)
1332 return (NULL);
1333 child[children - 1] = nv;
1334 }
1335
1336 if (children < mindev) {
1337 (void) fprintf(stderr, gettext("invalid vdev "
1338 "specification: %s requires at least %d "
1339 "devices\n"), argv[0], mindev);
1340 return (NULL);
1341 }
1342
1343 if (children > maxdev) {
1344 (void) fprintf(stderr, gettext("invalid vdev "
1345 "specification: %s supports no more than "
1346 "%d devices\n"), argv[0], maxdev);
1347 return (NULL);
1348 }
1349
1350 argc -= c;
1351 argv += c;
1352
1353 if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1354 spares = child;
1355 nspares = children;
1356 continue;
1357 } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1358 l2cache = child;
1359 nl2cache = children;
1360 continue;
1361 } else {
1362 verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
1363 0) == 0);
1364 verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
1365 type) == 0);
1366 verify(nvlist_add_uint64(nv,
1367 ZPOOL_CONFIG_IS_LOG,
1368 (uint64_t)is_log) == 0);
1369 verify(nvlist_add_uint64(nv,
1370 ZPOOL_CONFIG_IS_SPECIAL,
1371 (uint64_t)is_special) == 0);
1372 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
1373 verify(nvlist_add_uint64(nv,
1374 ZPOOL_CONFIG_NPARITY,
1375 mindev - 1) == 0);
1376 }
1377 verify(nvlist_add_nvlist_array(nv,
1378 ZPOOL_CONFIG_CHILDREN, child,
1379 children) == 0);
1380
1381 for (c = 0; c < children; c++)
1382 nvlist_free(child[c]);
1383 free(child);
1384 }
1385 } else {
1386 /*
1387 * We have a device. Pass off to make_leaf_vdev() to
1388 * construct the appropriate nvlist describing the vdev.
1389 */
1390 if ((nv = make_leaf_vdev(argv[0], (uint64_t)is_log,
1391 (uint64_t)is_special)) == NULL)
1392 return (NULL);
1393 if (is_log)
1394 nlogs++;
1395 if (is_special)
1396 nspecial++;
1397 argc--;
1398 argv++;
1399 }
1400
1401 toplevels++;
1402 top = realloc(top, toplevels * sizeof (nvlist_t *));
1403 if (top == NULL)
1404 zpool_no_memory();
1405 top[toplevels - 1] = nv;
1406 }
1407
1408 if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
1409 (void) fprintf(stderr, gettext("invalid vdev "
1410 "specification: at least one toplevel vdev must be "
1411 "specified\n"));
1412 return (NULL);
1413 }
1414
1415 if (seen_special && nspecial == 0) {
1416 (void) fprintf(stderr, gettext("invalid vdev specification: "
1417 "special requires at least 1 device\n"));
1418 return (NULL);
1419 }
1420
1421 if (seen_logs && nlogs == 0) {
1422 (void) fprintf(stderr, gettext("invalid vdev specification: "
1423 "log requires at least 1 device\n"));
1424 return (NULL);
1425 }
1426
1427 /*
1428 * Finally, create nvroot and add all top-level vdevs to it.
1429 */
1430 verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
1431 verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
1432 VDEV_TYPE_ROOT) == 0);
1433 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1434 top, toplevels) == 0);
1435 if (nspares != 0)
1436 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1437 spares, nspares) == 0);
1438 if (nl2cache != 0)
1439 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1440 l2cache, nl2cache) == 0);
1441
1442 for (t = 0; t < toplevels; t++)
1443 nvlist_free(top[t]);
1444 for (t = 0; t < nspares; t++)
1445 nvlist_free(spares[t]);
1446 for (t = 0; t < nl2cache; t++)
1447 nvlist_free(l2cache[t]);
1448 if (spares)
1449 free(spares);
1450 if (l2cache)
1451 free(l2cache);
1452 free(top);
1453
1454 return (nvroot);
1455 }
1456
1457 nvlist_t *
1458 split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
1459 splitflags_t flags, int argc, char **argv)
1460 {
1461 nvlist_t *newroot = NULL, **child;
1462 uint_t c, children;
1463 zpool_boot_label_t boot_type;
1464
1465 if (argc > 0) {
1466 if ((newroot = construct_spec(argc, argv)) == NULL) {
1467 (void) fprintf(stderr, gettext("Unable to build a "
1468 "pool from the specified devices\n"));
1469 return (NULL);
1470 }
1471
1472 if (zpool_is_bootable(zhp))
1473 boot_type = ZPOOL_COPY_BOOT_LABEL;
1474 else
1475 boot_type = ZPOOL_NO_BOOT_LABEL;
1476
1477 if (!flags.dryrun &&
1478 make_disks(zhp, newroot, boot_type, 0) != 0) {
1479 nvlist_free(newroot);
1480 return (NULL);
1481 }
1482
1483 /* avoid any tricks in the spec */
1484 verify(nvlist_lookup_nvlist_array(newroot,
1485 ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
1486 for (c = 0; c < children; c++) {
1487 char *path;
1488 const char *type;
1489 int min, max;
1490
1491 verify(nvlist_lookup_string(child[c],
1492 ZPOOL_CONFIG_PATH, &path) == 0);
1493 if ((type = is_grouping(path, &min, &max)) != NULL) {
1494 (void) fprintf(stderr, gettext("Cannot use "
1495 "'%s' as a device for splitting\n"), type);
1496 nvlist_free(newroot);
1497 return (NULL);
1498 }
1499 }
1500 }
1501
1502 if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
1503 nvlist_free(newroot);
1504 return (NULL);
1505 }
1506
1507 return (newroot);
1508 }
1509
1510 /*
1511 * Get and validate the contents of the given vdev specification. This ensures
1512 * that the nvlist returned is well-formed, that all the devices exist, and that
1513 * they are not currently in use by any other known consumer. The 'poolconfig'
1514 * parameter is the current configuration of the pool when adding devices
1515 * existing pool, and is used to perform additional checks, such as changing the
1516 * replication level of the pool. It can be 'NULL' to indicate that this is a
1517 * new pool. The 'force' flag controls whether devices should be forcefully
1518 * added, even if they appear in use.
1519 */
1520 nvlist_t *
1521 make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
1522 boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type,
1523 uint64_t boot_size, int argc, char **argv)
1524 {
1525 nvlist_t *newroot;
1526 nvlist_t *poolconfig = NULL;
1527 is_force = force;
1528
1529 /*
1530 * Construct the vdev specification. If this is successful, we know
1531 * that we have a valid specification, and that all devices can be
1532 * opened.
1533 */
1534 if ((newroot = construct_spec(argc, argv)) == NULL)
1535 return (NULL);
1536
1537 if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))
1538 return (NULL);
1539
1540 /*
1541 * Validate each device to make sure that its not shared with another
1542 * subsystem. We do this even if 'force' is set, because there are some
1543 * uses (such as a dedicated dump device) that even '-f' cannot
1544 * override.
1545 */
1546 if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) {
1547 nvlist_free(newroot);
1548 return (NULL);
1549 }
1550
1551 /*
1552 * Check the replication level of the given vdevs and report any errors
1553 * found. We include the existing pool spec, if any, as we need to
1554 * catch changes against the existing replication level.
1555 */
1556 if (check_rep && check_replication(poolconfig, newroot) != 0) {
1557 nvlist_free(newroot);
1558 return (NULL);
1559 }
1560
1561 /*
1562 * Run through the vdev specification and label any whole disks found.
1563 */
1564 if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) {
1565 nvlist_free(newroot);
1566 return (NULL);
1567 }
1568
1569 return (newroot);
1570 }