1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
  25  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
  26  */
  27 
  28 /*
  29  * Functions to convert between a list of vdevs and an nvlist representing the
  30  * configuration.  Each entry in the list can be one of:
  31  *
  32  *      Device vdevs
  33  *              disk=(path=..., devid=...)
  34  *              file=(path=...)
  35  *
  36  *      Group vdevs
  37  *              raidz[1|2]=(...)
  38  *              mirror=(...)
  39  *
  40  *      Hot spares
  41  *
  42  * While the underlying implementation supports it, group vdevs cannot contain
  43  * other group vdevs.  All userland verification of devices is contained within
  44  * this file.  If successful, the nvlist returned can be passed directly to the
  45  * kernel; we've done as much verification as possible in userland.
  46  *
  47  * Hot spares are a special case, and passed down as an array of disk vdevs, at
  48  * the same level as the root of the vdev tree.
  49  *
  50  * The only function exported by this file is 'make_root_vdev'.  The
  51  * function performs several passes:
  52  *
  53  *      1. Construct the vdev specification.  Performs syntax validation and
  54  *         makes sure each device is valid.
  55  *      2. Check for devices in use.  Using libdiskmgt, makes sure that no
  56  *         devices are also in use.  Some can be overridden using the 'force'
  57  *         flag, others cannot.
  58  *      3. Check for replication errors if the 'force' flag is not specified.
  59  *         validates that the replication level is consistent across the
  60  *         entire pool.
  61  *      4. Call libzfs to label any whole disks with an EFI label.
  62  */
  63 
  64 #include <assert.h>
  65 #include <devid.h>
  66 #include <errno.h>
  67 #include <fcntl.h>
  68 #include <libdiskmgt.h>
  69 #include <libintl.h>
  70 #include <libnvpair.h>
  71 #include <limits.h>
  72 #include <stdio.h>
  73 #include <string.h>
  74 #include <unistd.h>
  75 #include <sys/efi_partition.h>
  76 #include <sys/stat.h>
  77 #include <sys/vtoc.h>
  78 #include <sys/mntent.h>
  79 
  80 #include "zpool_util.h"
  81 
  82 #define BACKUP_SLICE    "s2"
  83 
  84 /*
  85  * For any given vdev specification, we can have multiple errors.  The
  86  * vdev_error() function keeps track of whether we have seen an error yet, and
  87  * prints out a header if its the first error we've seen.
  88  */
  89 boolean_t error_seen;
  90 boolean_t is_force;
  91 
  92 /*PRINTFLIKE1*/
  93 static void
  94 vdev_error(const char *fmt, ...)
  95 {
  96         va_list ap;
  97 
  98         if (!error_seen) {
  99                 (void) fprintf(stderr, gettext("invalid vdev specification\n"));
 100                 if (!is_force)
 101                         (void) fprintf(stderr, gettext("use '-f' to override "
 102                             "the following errors:\n"));
 103                 else
 104                         (void) fprintf(stderr, gettext("the following errors "
 105                             "must be manually repaired:\n"));
 106                 error_seen = B_TRUE;
 107         }
 108 
 109         va_start(ap, fmt);
 110         (void) vfprintf(stderr, fmt, ap);
 111         va_end(ap);
 112 }
 113 
 114 static void
 115 libdiskmgt_error(int error)
 116 {
 117         /*
 118          * ENXIO/ENODEV is a valid error message if the device doesn't live in
 119          * /dev/dsk.  Don't bother printing an error message in this case.
 120          */
 121         if (error == ENXIO || error == ENODEV)
 122                 return;
 123 
 124         (void) fprintf(stderr, gettext("warning: device in use checking "
 125             "failed: %s\n"), strerror(error));
 126 }
 127 
 128 /*
 129  * Validate a device, passing the bulk of the work off to libdiskmgt.
 130  */
 131 static int
 132 check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
 133 {
 134         char *msg;
 135         int error = 0;
 136         dm_who_type_t who;
 137 
 138         if (force)
 139                 who = DM_WHO_ZPOOL_FORCE;
 140         else if (isspare)
 141                 who = DM_WHO_ZPOOL_SPARE;
 142         else
 143                 who = DM_WHO_ZPOOL;
 144 
 145         if (dm_inuse((char *)path, &msg, who, &error) || error) {
 146                 if (error != 0) {
 147                         libdiskmgt_error(error);
 148                         return (0);
 149                 } else {
 150                         vdev_error("%s", msg);
 151                         free(msg);
 152                         return (-1);
 153                 }
 154         }
 155 
 156         /*
 157          * If we're given a whole disk, ignore overlapping slices since we're
 158          * about to label it anyway.
 159          */
 160         error = 0;
 161         if (!wholedisk && !force &&
 162             (dm_isoverlapping((char *)path, &msg, &error) || error)) {
 163                 if (error == 0) {
 164                         /* dm_isoverlapping returned -1 */
 165                         vdev_error(gettext("%s overlaps with %s\n"), path, msg);
 166                         free(msg);
 167                         return (-1);
 168                 } else if (error != ENODEV) {
 169                         /* libdiskmgt's devcache only handles physical drives */
 170                         libdiskmgt_error(error);
 171                         return (0);
 172                 }
 173         }
 174 
 175         return (0);
 176 }
 177 
 178 
 179 /*
 180  * Validate a whole disk.  Iterate over all slices on the disk and make sure
 181  * that none is in use by calling check_slice().
 182  */
 183 static int
 184 check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
 185 {
 186         dm_descriptor_t *drive, *media, *slice;
 187         int err = 0;
 188         int i;
 189         int ret;
 190 
 191         /*
 192          * Get the drive associated with this disk.  This should never fail,
 193          * because we already have an alias handle open for the device.
 194          */
 195         if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
 196             &err)) == NULL || *drive == NULL) {
 197                 if (err)
 198                         libdiskmgt_error(err);
 199                 return (0);
 200         }
 201 
 202         if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
 203             &err)) == NULL) {
 204                 dm_free_descriptors(drive);
 205                 if (err)
 206                         libdiskmgt_error(err);
 207                 return (0);
 208         }
 209 
 210         dm_free_descriptors(drive);
 211 
 212         /*
 213          * It is possible that the user has specified a removable media drive,
 214          * and the media is not present.
 215          */
 216         if (*media == NULL) {
 217                 dm_free_descriptors(media);
 218                 vdev_error(gettext("'%s' has no media in drive\n"), name);
 219                 return (-1);
 220         }
 221 
 222         if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
 223             &err)) == NULL) {
 224                 dm_free_descriptors(media);
 225                 if (err)
 226                         libdiskmgt_error(err);
 227                 return (0);
 228         }
 229 
 230         dm_free_descriptors(media);
 231 
 232         ret = 0;
 233 
 234         /*
 235          * Iterate over all slices and report any errors.  We don't care about
 236          * overlapping slices because we are using the whole disk.
 237          */
 238         for (i = 0; slice[i] != NULL; i++) {
 239                 char *name = dm_get_name(slice[i], &err);
 240 
 241                 if (check_slice(name, force, B_TRUE, isspare) != 0)
 242                         ret = -1;
 243 
 244                 dm_free_name(name);
 245         }
 246 
 247         dm_free_descriptors(slice);
 248         return (ret);
 249 }
 250 
 251 /*
 252  * Validate a device.
 253  */
 254 static int
 255 check_device(const char *path, boolean_t force, boolean_t isspare)
 256 {
 257         dm_descriptor_t desc;
 258         int err;
 259         char *dev;
 260 
 261         /*
 262          * For whole disks, libdiskmgt does not include the leading dev path.
 263          */
 264         dev = strrchr(path, '/');
 265         assert(dev != NULL);
 266         dev++;
 267         if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
 268                 err = check_disk(path, desc, force, isspare);
 269                 dm_free_descriptor(desc);
 270                 return (err);
 271         }
 272 
 273         return (check_slice(path, force, B_FALSE, isspare));
 274 }
 275 
 276 /*
 277  * Check that a file is valid.  All we can do in this case is check that it's
 278  * not in use by another pool, and not in use by swap.
 279  */
 280 static int
 281 check_file(const char *file, boolean_t force, boolean_t isspare)
 282 {
 283         char  *name;
 284         int fd;
 285         int ret = 0;
 286         int err;
 287         pool_state_t state;
 288         boolean_t inuse;
 289 
 290         if (dm_inuse_swap(file, &err)) {
 291                 if (err)
 292                         libdiskmgt_error(err);
 293                 else
 294                         vdev_error(gettext("%s is currently used by swap. "
 295                             "Please see swap(1M).\n"), file);
 296                 return (-1);
 297         }
 298 
 299         if ((fd = open(file, O_RDONLY)) < 0)
 300                 return (0);
 301 
 302         if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
 303                 const char *desc;
 304 
 305                 switch (state) {
 306                 case POOL_STATE_ACTIVE:
 307                         desc = gettext("active");
 308                         break;
 309 
 310                 case POOL_STATE_EXPORTED:
 311                         desc = gettext("exported");
 312                         break;
 313 
 314                 case POOL_STATE_POTENTIALLY_ACTIVE:
 315                         desc = gettext("potentially active");
 316                         break;
 317 
 318                 default:
 319                         desc = gettext("unknown");
 320                         break;
 321                 }
 322 
 323                 /*
 324                  * Allow hot spares to be shared between pools.
 325                  */
 326                 if (state == POOL_STATE_SPARE && isspare)
 327                         return (0);
 328 
 329                 if (state == POOL_STATE_ACTIVE ||
 330                     state == POOL_STATE_SPARE || !force) {
 331                         switch (state) {
 332                         case POOL_STATE_SPARE:
 333                                 vdev_error(gettext("%s is reserved as a hot "
 334                                     "spare for pool %s\n"), file, name);
 335                                 break;
 336                         default:
 337                                 vdev_error(gettext("%s is part of %s pool "
 338                                     "'%s'\n"), file, desc, name);
 339                                 break;
 340                         }
 341                         ret = -1;
 342                 }
 343 
 344                 free(name);
 345         }
 346 
 347         (void) close(fd);
 348         return (ret);
 349 }
 350 
 351 
 352 /*
 353  * By "whole disk" we mean an entire physical disk (something we can
 354  * label, toggle the write cache on, etc.) as opposed to the full
 355  * capacity of a pseudo-device such as lofi or did.  We act as if we
 356  * are labeling the disk, which should be a pretty good test of whether
 357  * it's a viable device or not.  Returns B_TRUE if it is and B_FALSE if
 358  * it isn't.
 359  */
 360 static boolean_t
 361 is_whole_disk(const char *arg)
 362 {
 363         struct dk_gpt *label;
 364         int     fd;
 365         char    path[MAXPATHLEN];
 366 
 367         (void) snprintf(path, sizeof (path), "%s%s%s",
 368             ZFS_RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
 369         if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
 370                 return (B_FALSE);
 371         if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
 372                 (void) close(fd);
 373                 return (B_FALSE);
 374         }
 375         efi_free(label);
 376         (void) close(fd);
 377         return (B_TRUE);
 378 }
 379 
 380 /*
 381  * Create a leaf vdev.  Determine if this is a file or a device.  If it's a
 382  * device, fill in the device id to make a complete nvlist.  Valid forms for a
 383  * leaf vdev are:
 384  *
 385  *      /dev/dsk/xxx    Complete disk path
 386  *      /xxx            Full path to file
 387  *      xxx             Shorthand for /dev/dsk/xxx
 388  */
 389 static nvlist_t *
 390 make_leaf_vdev(const char *arg, uint64_t is_log)
 391 {
 392         char path[MAXPATHLEN];
 393         struct stat64 statbuf;
 394         nvlist_t *vdev = NULL;
 395         char *type = NULL;
 396         boolean_t wholedisk = B_FALSE;
 397 
 398         /*
 399          * Determine what type of vdev this is, and put the full path into
 400          * 'path'.  We detect whether this is a device of file afterwards by
 401          * checking the st_mode of the file.
 402          */
 403         if (arg[0] == '/') {
 404                 /*
 405                  * Complete device or file path.  Exact type is determined by
 406                  * examining the file descriptor afterwards.
 407                  */
 408                 wholedisk = is_whole_disk(arg);
 409                 if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
 410                         (void) fprintf(stderr,
 411                             gettext("cannot open '%s': %s\n"),
 412                             arg, strerror(errno));
 413                         return (NULL);
 414                 }
 415 
 416                 (void) strlcpy(path, arg, sizeof (path));
 417         } else {
 418                 /*
 419                  * This may be a short path for a device, or it could be total
 420                  * gibberish.  Check to see if it's a known device in
 421                  * /dev/dsk/.  As part of this check, see if we've been given a
 422                  * an entire disk (minus the slice number).
 423                  */
 424                 (void) snprintf(path, sizeof (path), "%s/%s", ZFS_DISK_ROOT,
 425                     arg);
 426                 wholedisk = is_whole_disk(path);
 427                 if (!wholedisk && (stat64(path, &statbuf) != 0)) {
 428                         /*
 429                          * If we got ENOENT, then the user gave us
 430                          * gibberish, so try to direct them with a
 431                          * reasonable error message.  Otherwise,
 432                          * regurgitate strerror() since it's the best we
 433                          * can do.
 434                          */
 435                         if (errno == ENOENT) {
 436                                 (void) fprintf(stderr,
 437                                     gettext("cannot open '%s': no such "
 438                                     "device in %s\n"), arg, ZFS_DISK_ROOT);
 439                                 (void) fprintf(stderr,
 440                                     gettext("must be a full path or "
 441                                     "shorthand device name\n"));
 442                                 return (NULL);
 443                         } else {
 444                                 (void) fprintf(stderr,
 445                                     gettext("cannot open '%s': %s\n"),
 446                                     path, strerror(errno));
 447                                 return (NULL);
 448                         }
 449                 }
 450         }
 451 
 452         /*
 453          * Determine whether this is a device or a file.
 454          */
 455         if (wholedisk || S_ISBLK(statbuf.st_mode)) {
 456                 type = VDEV_TYPE_DISK;
 457         } else if (S_ISREG(statbuf.st_mode)) {
 458                 type = VDEV_TYPE_FILE;
 459         } else {
 460                 (void) fprintf(stderr, gettext("cannot use '%s': must be a "
 461                     "block device or regular file\n"), path);
 462                 return (NULL);
 463         }
 464 
 465         /*
 466          * Finally, we have the complete device or file, and we know that it is
 467          * acceptable to use.  Construct the nvlist to describe this vdev.  All
 468          * vdevs have a 'path' element, and devices also have a 'devid' element.
 469          */
 470         verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
 471         verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
 472         verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
 473         verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
 474         if (strcmp(type, VDEV_TYPE_DISK) == 0)
 475                 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
 476                     (uint64_t)wholedisk) == 0);
 477 
 478         /*
 479          * For a whole disk, defer getting its devid until after labeling it.
 480          */
 481         if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
 482                 /*
 483                  * Get the devid for the device.
 484                  */
 485                 int fd;
 486                 ddi_devid_t devid;
 487                 char *minor = NULL, *devid_str = NULL;
 488 
 489                 if ((fd = open(path, O_RDONLY)) < 0) {
 490                         (void) fprintf(stderr, gettext("cannot open '%s': "
 491                             "%s\n"), path, strerror(errno));
 492                         nvlist_free(vdev);
 493                         return (NULL);
 494                 }
 495 
 496                 if (devid_get(fd, &devid) == 0) {
 497                         if (devid_get_minor_name(fd, &minor) == 0 &&
 498                             (devid_str = devid_str_encode(devid, minor)) !=
 499                             NULL) {
 500                                 verify(nvlist_add_string(vdev,
 501                                     ZPOOL_CONFIG_DEVID, devid_str) == 0);
 502                         }
 503                         if (devid_str != NULL)
 504                                 devid_str_free(devid_str);
 505                         if (minor != NULL)
 506                                 devid_str_free(minor);
 507                         devid_free(devid);
 508                 }
 509 
 510                 (void) close(fd);
 511         }
 512 
 513         return (vdev);
 514 }
 515 
 516 /*
 517  * Go through and verify the replication level of the pool is consistent.
 518  * Performs the following checks:
 519  *
 520  *      For the new spec, verifies that devices in mirrors and raidz are the
 521  *      same size.
 522  *
 523  *      If the current configuration already has inconsistent replication
 524  *      levels, ignore any other potential problems in the new spec.
 525  *
 526  *      Otherwise, make sure that the current spec (if there is one) and the new
 527  *      spec have consistent replication levels.
 528  */
 529 typedef struct replication_level {
 530         char *zprl_type;
 531         uint64_t zprl_children;
 532         uint64_t zprl_parity;
 533 } replication_level_t;
 534 
 535 #define ZPOOL_FUZZ      (16 * 1024 * 1024)
 536 
 537 /*
 538  * Given a list of toplevel vdevs, return the current replication level.  If
 539  * the config is inconsistent, then NULL is returned.  If 'fatal' is set, then
 540  * an error message will be displayed for each self-inconsistent vdev.
 541  */
 542 static replication_level_t *
 543 get_replication(nvlist_t *nvroot, boolean_t fatal)
 544 {
 545         nvlist_t **top;
 546         uint_t t, toplevels;
 547         nvlist_t **child;
 548         uint_t c, children;
 549         nvlist_t *nv;
 550         char *type;
 551         replication_level_t lastrep = {0};
 552         replication_level_t rep;
 553         replication_level_t *ret;
 554         boolean_t dontreport;
 555 
 556         ret = safe_malloc(sizeof (replication_level_t));
 557 
 558         verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 559             &top, &toplevels) == 0);
 560 
 561         for (t = 0; t < toplevels; t++) {
 562                 uint64_t is_log = B_FALSE;
 563 
 564                 nv = top[t];
 565 
 566                 /*
 567                  * For separate logs we ignore the top level vdev replication
 568                  * constraints.
 569                  */
 570                 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
 571                 if (is_log)
 572                         continue;
 573 
 574                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE,
 575                     &type) == 0);
 576                 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 577                     &child, &children) != 0) {
 578                         /*
 579                          * This is a 'file' or 'disk' vdev.
 580                          */
 581                         rep.zprl_type = type;
 582                         rep.zprl_children = 1;
 583                         rep.zprl_parity = 0;
 584                 } else {
 585                         uint64_t vdev_size;
 586 
 587                         /*
 588                          * This is a mirror or RAID-Z vdev.  Go through and make
 589                          * sure the contents are all the same (files vs. disks),
 590                          * keeping track of the number of elements in the
 591                          * process.
 592                          *
 593                          * We also check that the size of each vdev (if it can
 594                          * be determined) is the same.
 595                          */
 596                         rep.zprl_type = type;
 597                         rep.zprl_children = 0;
 598 
 599                         if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
 600                                 verify(nvlist_lookup_uint64(nv,
 601                                     ZPOOL_CONFIG_NPARITY,
 602                                     &rep.zprl_parity) == 0);
 603                                 assert(rep.zprl_parity != 0);
 604                         } else {
 605                                 rep.zprl_parity = 0;
 606                         }
 607 
 608                         /*
 609                          * The 'dontreport' variable indicates that we've
 610                          * already reported an error for this spec, so don't
 611                          * bother doing it again.
 612                          */
 613                         type = NULL;
 614                         dontreport = 0;
 615                         vdev_size = -1ULL;
 616                         for (c = 0; c < children; c++) {
 617                                 nvlist_t *cnv = child[c];
 618                                 char *path;
 619                                 struct stat64 statbuf;
 620                                 uint64_t size = -1ULL;
 621                                 char *childtype;
 622                                 int fd, err;
 623 
 624                                 rep.zprl_children++;
 625 
 626                                 verify(nvlist_lookup_string(cnv,
 627                                     ZPOOL_CONFIG_TYPE, &childtype) == 0);
 628 
 629                                 /*
 630                                  * If this is a replacing or spare vdev, then
 631                                  * get the real first child of the vdev: do this
 632                                  * in a loop because replacing and spare vdevs
 633                                  * can be nested.
 634                                  */
 635                                 while (strcmp(childtype,
 636                                     VDEV_TYPE_REPLACING) == 0 ||
 637                                     strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
 638                                         nvlist_t **rchild;
 639                                         uint_t rchildren;
 640 
 641                                         verify(nvlist_lookup_nvlist_array(cnv,
 642                                             ZPOOL_CONFIG_CHILDREN, &rchild,
 643                                             &rchildren) == 0);
 644                                         assert(rchildren == 2);
 645                                         cnv = rchild[0];
 646 
 647                                         verify(nvlist_lookup_string(cnv,
 648                                             ZPOOL_CONFIG_TYPE,
 649                                             &childtype) == 0);
 650                                 }
 651 
 652                                 verify(nvlist_lookup_string(cnv,
 653                                     ZPOOL_CONFIG_PATH, &path) == 0);
 654 
 655                                 /*
 656                                  * If we have a raidz/mirror that combines disks
 657                                  * with files, report it as an error.
 658                                  */
 659                                 if (!dontreport && type != NULL &&
 660                                     strcmp(type, childtype) != 0) {
 661                                         if (ret != NULL)
 662                                                 free(ret);
 663                                         ret = NULL;
 664                                         if (fatal)
 665                                                 vdev_error(gettext(
 666                                                     "mismatched replication "
 667                                                     "level: %s contains both "
 668                                                     "files and devices\n"),
 669                                                     rep.zprl_type);
 670                                         else
 671                                                 return (NULL);
 672                                         dontreport = B_TRUE;
 673                                 }
 674 
 675                                 /*
 676                                  * According to stat(2), the value of 'st_size'
 677                                  * is undefined for block devices and character
 678                                  * devices.  But there is no effective way to
 679                                  * determine the real size in userland.
 680                                  *
 681                                  * Instead, we'll take advantage of an
 682                                  * implementation detail of spec_size().  If the
 683                                  * device is currently open, then we (should)
 684                                  * return a valid size.
 685                                  *
 686                                  * If we still don't get a valid size (indicated
 687                                  * by a size of 0 or MAXOFFSET_T), then ignore
 688                                  * this device altogether.
 689                                  */
 690                                 if ((fd = open(path, O_RDONLY)) >= 0) {
 691                                         err = fstat64(fd, &statbuf);
 692                                         (void) close(fd);
 693                                 } else {
 694                                         err = stat64(path, &statbuf);
 695                                 }
 696 
 697                                 if (err != 0 ||
 698                                     statbuf.st_size == 0 ||
 699                                     statbuf.st_size == MAXOFFSET_T)
 700                                         continue;
 701 
 702                                 size = statbuf.st_size;
 703 
 704                                 /*
 705                                  * Also make sure that devices and
 706                                  * slices have a consistent size.  If
 707                                  * they differ by a significant amount
 708                                  * (~16MB) then report an error.
 709                                  */
 710                                 if (!dontreport &&
 711                                     (vdev_size != -1ULL &&
 712                                     (labs(size - vdev_size) >
 713                                     ZPOOL_FUZZ))) {
 714                                         if (ret != NULL)
 715                                                 free(ret);
 716                                         ret = NULL;
 717                                         if (fatal)
 718                                                 vdev_error(gettext(
 719                                                     "%s contains devices of "
 720                                                     "different sizes\n"),
 721                                                     rep.zprl_type);
 722                                         else
 723                                                 return (NULL);
 724                                         dontreport = B_TRUE;
 725                                 }
 726 
 727                                 type = childtype;
 728                                 vdev_size = size;
 729                         }
 730                 }
 731 
 732                 /*
 733                  * At this point, we have the replication of the last toplevel
 734                  * vdev in 'rep'.  Compare it to 'lastrep' to see if its
 735                  * different.
 736                  */
 737                 if (lastrep.zprl_type != NULL) {
 738                         if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
 739                                 if (ret != NULL)
 740                                         free(ret);
 741                                 ret = NULL;
 742                                 if (fatal)
 743                                         vdev_error(gettext(
 744                                             "mismatched replication level: "
 745                                             "both %s and %s vdevs are "
 746                                             "present\n"),
 747                                             lastrep.zprl_type, rep.zprl_type);
 748                                 else
 749                                         return (NULL);
 750                         } else if (lastrep.zprl_parity != rep.zprl_parity) {
 751                                 if (ret)
 752                                         free(ret);
 753                                 ret = NULL;
 754                                 if (fatal)
 755                                         vdev_error(gettext(
 756                                             "mismatched replication level: "
 757                                             "both %llu and %llu device parity "
 758                                             "%s vdevs are present\n"),
 759                                             lastrep.zprl_parity,
 760                                             rep.zprl_parity,
 761                                             rep.zprl_type);
 762                                 else
 763                                         return (NULL);
 764                         } else if (lastrep.zprl_children != rep.zprl_children) {
 765                                 if (ret)
 766                                         free(ret);
 767                                 ret = NULL;
 768                                 if (fatal)
 769                                         vdev_error(gettext(
 770                                             "mismatched replication level: "
 771                                             "both %llu-way and %llu-way %s "
 772                                             "vdevs are present\n"),
 773                                             lastrep.zprl_children,
 774                                             rep.zprl_children,
 775                                             rep.zprl_type);
 776                                 else
 777                                         return (NULL);
 778                         }
 779                 }
 780                 lastrep = rep;
 781         }
 782 
 783         if (ret != NULL)
 784                 *ret = rep;
 785 
 786         return (ret);
 787 }
 788 
 789 /*
 790  * Check the replication level of the vdev spec against the current pool.  Calls
 791  * get_replication() to make sure the new spec is self-consistent.  If the pool
 792  * has a consistent replication level, then we ignore any errors.  Otherwise,
 793  * report any difference between the two.
 794  */
 795 static int
 796 check_replication(nvlist_t *config, nvlist_t *newroot)
 797 {
 798         nvlist_t **child;
 799         uint_t  children;
 800         replication_level_t *current = NULL, *new;
 801         int ret;
 802 
 803         /*
 804          * If we have a current pool configuration, check to see if it's
 805          * self-consistent.  If not, simply return success.
 806          */
 807         if (config != NULL) {
 808                 nvlist_t *nvroot;
 809 
 810                 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 811                     &nvroot) == 0);
 812                 if ((current = get_replication(nvroot, B_FALSE)) == NULL)
 813                         return (0);
 814         }
 815         /*
 816          * for spares there may be no children, and therefore no
 817          * replication level to check
 818          */
 819         if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN,
 820             &child, &children) != 0) || (children == 0)) {
 821                 free(current);
 822                 return (0);
 823         }
 824 
 825         /*
 826          * If all we have is logs then there's no replication level to check.
 827          */
 828         if (num_logs(newroot) == children) {
 829                 free(current);
 830                 return (0);
 831         }
 832 
 833         /*
 834          * Get the replication level of the new vdev spec, reporting any
 835          * inconsistencies found.
 836          */
 837         if ((new = get_replication(newroot, B_TRUE)) == NULL) {
 838                 free(current);
 839                 return (-1);
 840         }
 841 
 842         /*
 843          * Check to see if the new vdev spec matches the replication level of
 844          * the current pool.
 845          */
 846         ret = 0;
 847         if (current != NULL) {
 848                 if (strcmp(current->zprl_type, new->zprl_type) != 0) {
 849                         vdev_error(gettext(
 850                             "mismatched replication level: pool uses %s "
 851                             "and new vdev is %s\n"),
 852                             current->zprl_type, new->zprl_type);
 853                         ret = -1;
 854                 } else if (current->zprl_parity != new->zprl_parity) {
 855                         vdev_error(gettext(
 856                             "mismatched replication level: pool uses %llu "
 857                             "device parity and new vdev uses %llu\n"),
 858                             current->zprl_parity, new->zprl_parity);
 859                         ret = -1;
 860                 } else if (current->zprl_children != new->zprl_children) {
 861                         vdev_error(gettext(
 862                             "mismatched replication level: pool uses %llu-way "
 863                             "%s and new vdev uses %llu-way %s\n"),
 864                             current->zprl_children, current->zprl_type,
 865                             new->zprl_children, new->zprl_type);
 866                         ret = -1;
 867                 }
 868         }
 869 
 870         free(new);
 871         if (current != NULL)
 872                 free(current);
 873 
 874         return (ret);
 875 }
 876 
 877 /*
 878  * Go through and find any whole disks in the vdev specification, labelling them
 879  * as appropriate.  When constructing the vdev spec, we were unable to open this
 880  * device in order to provide a devid.  Now that we have labelled the disk and
 881  * know the pool slice is valid, we can construct the devid now.
 882  *
 883  * If the disk was already labeled with an EFI label, we will have gotten the
 884  * devid already (because we were able to open the whole disk).  Otherwise, we
 885  * need to get the devid after we label the disk.
 886  */
 887 static int
 888 make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type,
 889     uint64_t boot_size)
 890 {
 891         nvlist_t **child;
 892         uint_t c, children;
 893         char *type, *path, *diskname;
 894         char buf[MAXPATHLEN];
 895         uint64_t wholedisk;
 896         int fd;
 897         int ret;
 898         int slice;
 899         ddi_devid_t devid;
 900         char *minor = NULL, *devid_str = NULL;
 901 
 902         verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 903 
 904         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 905             &child, &children) != 0) {
 906 
 907                 if (strcmp(type, VDEV_TYPE_DISK) != 0)
 908                         return (0);
 909 
 910                 /*
 911                  * We have a disk device.  Get the path to the device
 912                  * and see if it's a whole disk by appending the backup
 913                  * slice and stat()ing the device.
 914                  */
 915                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
 916 
 917                 diskname = strrchr(path, '/');
 918                 assert(diskname != NULL);
 919                 diskname++;
 920 
 921                 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 922                     &wholedisk) != 0 || !wholedisk) {
 923                         /*
 924                          * This is not whole disk, return error if
 925                          * boot partition creation was requested
 926                          */
 927                         if (boot_type == ZPOOL_CREATE_BOOT_LABEL) {
 928                                 (void) fprintf(stderr,
 929                                     gettext("creating boot partition is only "
 930                                     "supported on whole disk vdevs: %s\n"),
 931                                     diskname);
 932                                 return (-1);
 933                         }
 934                         return (0);
 935                 }
 936 
 937                 ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type,
 938                     boot_size, &slice);
 939                 if (ret == -1)
 940                         return (ret);
 941 
 942                 /*
 943                  * Fill in the devid, now that we've labeled the disk.
 944                  */
 945                 (void) snprintf(buf, sizeof (buf), "%ss%d", path, slice);
 946                 if ((fd = open(buf, O_RDONLY)) < 0) {
 947                         (void) fprintf(stderr,
 948                             gettext("cannot open '%s': %s\n"),
 949                             buf, strerror(errno));
 950                         return (-1);
 951                 }
 952 
 953                 if (devid_get(fd, &devid) == 0) {
 954                         if (devid_get_minor_name(fd, &minor) == 0 &&
 955                             (devid_str = devid_str_encode(devid, minor)) !=
 956                             NULL) {
 957                                 verify(nvlist_add_string(nv,
 958                                     ZPOOL_CONFIG_DEVID, devid_str) == 0);
 959                         }
 960                         if (devid_str != NULL)
 961                                 devid_str_free(devid_str);
 962                         if (minor != NULL)
 963                                 devid_str_free(minor);
 964                         devid_free(devid);
 965                 }
 966 
 967                 /*
 968                  * Update the path to refer to the pool slice.  The presence of
 969                  * the 'whole_disk' field indicates to the CLI that we should
 970                  * chop off the slice number when displaying the device in
 971                  * future output.
 972                  */
 973                 verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
 974 
 975                 (void) close(fd);
 976 
 977                 return (0);
 978         }
 979 
 980         /* illumos kernel does not support booting from multi-vdev pools. */
 981         if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) {
 982                 if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) {
 983                         (void) fprintf(stderr, gettext("boot pool "
 984                             "can not have more than one vdev\n"));
 985                         return (-1);
 986                 }
 987         }
 988 
 989         for (c = 0; c < children; c++) {
 990                 ret = make_disks(zhp, child[c], boot_type, boot_size);
 991                 if (ret != 0)
 992                         return (ret);
 993         }
 994 
 995         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 996             &child, &children) == 0)
 997                 for (c = 0; c < children; c++) {
 998                         ret = make_disks(zhp, child[c], boot_type, boot_size);
 999                         if (ret != 0)
1000                                 return (ret);
1001                 }
1002 
1003         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1004             &child, &children) == 0)
1005                 for (c = 0; c < children; c++) {
1006                         ret = make_disks(zhp, child[c], boot_type, boot_size);
1007                         if (ret != 0)
1008                                 return (ret);
1009                 }
1010 
1011         return (0);
1012 }
1013 
1014 /*
1015  * Determine if the given path is a hot spare within the given configuration.
1016  */
1017 static boolean_t
1018 is_spare(nvlist_t *config, const char *path)
1019 {
1020         int fd;
1021         pool_state_t state;
1022         char *name = NULL;
1023         nvlist_t *label;
1024         uint64_t guid, spareguid;
1025         nvlist_t *nvroot;
1026         nvlist_t **spares;
1027         uint_t i, nspares;
1028         boolean_t inuse;
1029 
1030         if ((fd = open(path, O_RDONLY)) < 0)
1031                 return (B_FALSE);
1032 
1033         if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
1034             !inuse ||
1035             state != POOL_STATE_SPARE ||
1036             zpool_read_label(fd, &label) != 0) {
1037                 free(name);
1038                 (void) close(fd);
1039                 return (B_FALSE);
1040         }
1041         free(name);
1042         (void) close(fd);
1043 
1044         verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
1045         nvlist_free(label);
1046 
1047         verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
1048             &nvroot) == 0);
1049         if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1050             &spares, &nspares) == 0) {
1051                 for (i = 0; i < nspares; i++) {
1052                         verify(nvlist_lookup_uint64(spares[i],
1053                             ZPOOL_CONFIG_GUID, &spareguid) == 0);
1054                         if (spareguid == guid)
1055                                 return (B_TRUE);
1056                 }
1057         }
1058 
1059         return (B_FALSE);
1060 }
1061 
1062 /*
1063  * Go through and find any devices that are in use.  We rely on libdiskmgt for
1064  * the majority of this task.
1065  */
1066 static boolean_t
1067 is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
1068     boolean_t replacing, boolean_t isspare)
1069 {
1070         nvlist_t **child;
1071         uint_t c, children;
1072         char *type, *path;
1073         int ret = 0;
1074         char buf[MAXPATHLEN];
1075         uint64_t wholedisk;
1076         boolean_t anyinuse = B_FALSE;
1077 
1078         verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
1079 
1080         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1081             &child, &children) != 0) {
1082 
1083                 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
1084 
1085                 /*
1086                  * As a generic check, we look to see if this is a replace of a
1087                  * hot spare within the same pool.  If so, we allow it
1088                  * regardless of what libdiskmgt or zpool_in_use() says.
1089                  */
1090                 if (replacing) {
1091                         if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1092                             &wholedisk) == 0 && wholedisk)
1093                                 (void) snprintf(buf, sizeof (buf), "%ss0",
1094                                     path);
1095                         else
1096                                 (void) strlcpy(buf, path, sizeof (buf));
1097 
1098                         if (is_spare(config, buf))
1099                                 return (B_FALSE);
1100                 }
1101 
1102                 if (strcmp(type, VDEV_TYPE_DISK) == 0)
1103                         ret = check_device(path, force, isspare);
1104                 else if (strcmp(type, VDEV_TYPE_FILE) == 0)
1105                         ret = check_file(path, force, isspare);
1106 
1107                 return (ret != 0);
1108         }
1109 
1110         for (c = 0; c < children; c++)
1111                 if (is_device_in_use(config, child[c], force, replacing,
1112                     B_FALSE))
1113                         anyinuse = B_TRUE;
1114 
1115         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1116             &child, &children) == 0)
1117                 for (c = 0; c < children; c++)
1118                         if (is_device_in_use(config, child[c], force, replacing,
1119                             B_TRUE))
1120                                 anyinuse = B_TRUE;
1121 
1122         if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1123             &child, &children) == 0)
1124                 for (c = 0; c < children; c++)
1125                         if (is_device_in_use(config, child[c], force, replacing,
1126                             B_FALSE))
1127                                 anyinuse = B_TRUE;
1128 
1129         return (anyinuse);
1130 }
1131 
1132 static const char *
1133 is_grouping(const char *type, int *mindev, int *maxdev)
1134 {
1135         if (strncmp(type, "raidz", 5) == 0) {
1136                 const char *p = type + 5;
1137                 char *end;
1138                 long nparity;
1139 
1140                 if (*p == '\0') {
1141                         nparity = 1;
1142                 } else if (*p == '0') {
1143                         return (NULL); /* no zero prefixes allowed */
1144                 } else {
1145                         errno = 0;
1146                         nparity = strtol(p, &end, 10);
1147                         if (errno != 0 || nparity < 1 || nparity >= 255 ||
1148                             *end != '\0')
1149                                 return (NULL);
1150                 }
1151 
1152                 if (mindev != NULL)
1153                         *mindev = nparity + 1;
1154                 if (maxdev != NULL)
1155                         *maxdev = 255;
1156                 return (VDEV_TYPE_RAIDZ);
1157         }
1158 
1159         if (maxdev != NULL)
1160                 *maxdev = INT_MAX;
1161 
1162         if (strcmp(type, "mirror") == 0) {
1163                 if (mindev != NULL)
1164                         *mindev = 2;
1165                 return (VDEV_TYPE_MIRROR);
1166         }
1167 
1168         if (strcmp(type, "spare") == 0) {
1169                 if (mindev != NULL)
1170                         *mindev = 1;
1171                 return (VDEV_TYPE_SPARE);
1172         }
1173 
1174         if (strcmp(type, "log") == 0) {
1175                 if (mindev != NULL)
1176                         *mindev = 1;
1177                 return (VDEV_TYPE_LOG);
1178         }
1179 
1180         if (strcmp(type, "cache") == 0) {
1181                 if (mindev != NULL)
1182                         *mindev = 1;
1183                 return (VDEV_TYPE_L2CACHE);
1184         }
1185 
1186         return (NULL);
1187 }
1188 
1189 /*
1190  * Construct a syntactically valid vdev specification,
1191  * and ensure that all devices and files exist and can be opened.
1192  * Note: we don't bother freeing anything in the error paths
1193  * because the program is just going to exit anyway.
1194  */
1195 nvlist_t *
1196 construct_spec(int argc, char **argv)
1197 {
1198         nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
1199         int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
1200         const char *type;
1201         uint64_t is_log;
1202         boolean_t seen_logs;
1203 
1204         top = NULL;
1205         toplevels = 0;
1206         spares = NULL;
1207         l2cache = NULL;
1208         nspares = 0;
1209         nlogs = 0;
1210         nl2cache = 0;
1211         is_log = B_FALSE;
1212         seen_logs = B_FALSE;
1213 
1214         while (argc > 0) {
1215                 nv = NULL;
1216 
1217                 /*
1218                  * If it's a mirror or raidz, the subsequent arguments are
1219                  * its leaves -- until we encounter the next mirror or raidz.
1220                  */
1221                 if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
1222                         nvlist_t **child = NULL;
1223                         int c, children = 0;
1224 
1225                         if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1226                                 if (spares != NULL) {
1227                                         (void) fprintf(stderr,
1228                                             gettext("invalid vdev "
1229                                             "specification: 'spare' can be "
1230                                             "specified only once\n"));
1231                                         return (NULL);
1232                                 }
1233                                 is_log = B_FALSE;
1234                         }
1235 
1236                         if (strcmp(type, VDEV_TYPE_LOG) == 0) {
1237                                 if (seen_logs) {
1238                                         (void) fprintf(stderr,
1239                                             gettext("invalid vdev "
1240                                             "specification: 'log' can be "
1241                                             "specified only once\n"));
1242                                         return (NULL);
1243                                 }
1244                                 seen_logs = B_TRUE;
1245                                 is_log = B_TRUE;
1246                                 argc--;
1247                                 argv++;
1248                                 /*
1249                                  * A log is not a real grouping device.
1250                                  * We just set is_log and continue.
1251                                  */
1252                                 continue;
1253                         }
1254 
1255                         if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1256                                 if (l2cache != NULL) {
1257                                         (void) fprintf(stderr,
1258                                             gettext("invalid vdev "
1259                                             "specification: 'cache' can be "
1260                                             "specified only once\n"));
1261                                         return (NULL);
1262                                 }
1263                                 is_log = B_FALSE;
1264                         }
1265 
1266                         if (is_log) {
1267                                 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
1268                                         (void) fprintf(stderr,
1269                                             gettext("invalid vdev "
1270                                             "specification: unsupported 'log' "
1271                                             "device: %s\n"), type);
1272                                         return (NULL);
1273                                 }
1274                                 nlogs++;
1275                         }
1276 
1277                         for (c = 1; c < argc; c++) {
1278                                 if (is_grouping(argv[c], NULL, NULL) != NULL)
1279                                         break;
1280                                 children++;
1281                                 child = realloc(child,
1282                                     children * sizeof (nvlist_t *));
1283                                 if (child == NULL)
1284                                         zpool_no_memory();
1285                                 if ((nv = make_leaf_vdev(argv[c], B_FALSE))
1286                                     == NULL)
1287                                         return (NULL);
1288                                 child[children - 1] = nv;
1289                         }
1290 
1291                         if (children < mindev) {
1292                                 (void) fprintf(stderr, gettext("invalid vdev "
1293                                     "specification: %s requires at least %d "
1294                                     "devices\n"), argv[0], mindev);
1295                                 return (NULL);
1296                         }
1297 
1298                         if (children > maxdev) {
1299                                 (void) fprintf(stderr, gettext("invalid vdev "
1300                                     "specification: %s supports no more than "
1301                                     "%d devices\n"), argv[0], maxdev);
1302                                 return (NULL);
1303                         }
1304 
1305                         argc -= c;
1306                         argv += c;
1307 
1308                         if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1309                                 spares = child;
1310                                 nspares = children;
1311                                 continue;
1312                         } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1313                                 l2cache = child;
1314                                 nl2cache = children;
1315                                 continue;
1316                         } else {
1317                                 verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
1318                                     0) == 0);
1319                                 verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
1320                                     type) == 0);
1321                                 verify(nvlist_add_uint64(nv,
1322                                     ZPOOL_CONFIG_IS_LOG, is_log) == 0);
1323                                 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
1324                                         verify(nvlist_add_uint64(nv,
1325                                             ZPOOL_CONFIG_NPARITY,
1326                                             mindev - 1) == 0);
1327                                 }
1328                                 verify(nvlist_add_nvlist_array(nv,
1329                                     ZPOOL_CONFIG_CHILDREN, child,
1330                                     children) == 0);
1331 
1332                                 for (c = 0; c < children; c++)
1333                                         nvlist_free(child[c]);
1334                                 free(child);
1335                         }
1336                 } else {
1337                         /*
1338                          * We have a device.  Pass off to make_leaf_vdev() to
1339                          * construct the appropriate nvlist describing the vdev.
1340                          */
1341                         if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL)
1342                                 return (NULL);
1343                         if (is_log)
1344                                 nlogs++;
1345                         argc--;
1346                         argv++;
1347                 }
1348 
1349                 toplevels++;
1350                 top = realloc(top, toplevels * sizeof (nvlist_t *));
1351                 if (top == NULL)
1352                         zpool_no_memory();
1353                 top[toplevels - 1] = nv;
1354         }
1355 
1356         if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
1357                 (void) fprintf(stderr, gettext("invalid vdev "
1358                     "specification: at least one toplevel vdev must be "
1359                     "specified\n"));
1360                 return (NULL);
1361         }
1362 
1363         if (seen_logs && nlogs == 0) {
1364                 (void) fprintf(stderr, gettext("invalid vdev specification: "
1365                     "log requires at least 1 device\n"));
1366                 return (NULL);
1367         }
1368 
1369         /*
1370          * Finally, create nvroot and add all top-level vdevs to it.
1371          */
1372         verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
1373         verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
1374             VDEV_TYPE_ROOT) == 0);
1375         verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1376             top, toplevels) == 0);
1377         if (nspares != 0)
1378                 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1379                     spares, nspares) == 0);
1380         if (nl2cache != 0)
1381                 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1382                     l2cache, nl2cache) == 0);
1383 
1384         for (t = 0; t < toplevels; t++)
1385                 nvlist_free(top[t]);
1386         for (t = 0; t < nspares; t++)
1387                 nvlist_free(spares[t]);
1388         for (t = 0; t < nl2cache; t++)
1389                 nvlist_free(l2cache[t]);
1390         if (spares)
1391                 free(spares);
1392         if (l2cache)
1393                 free(l2cache);
1394         free(top);
1395 
1396         return (nvroot);
1397 }
1398 
1399 nvlist_t *
1400 split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
1401     splitflags_t flags, int argc, char **argv)
1402 {
1403         nvlist_t *newroot = NULL, **child;
1404         uint_t c, children;
1405         zpool_boot_label_t boot_type;
1406 
1407         if (argc > 0) {
1408                 if ((newroot = construct_spec(argc, argv)) == NULL) {
1409                         (void) fprintf(stderr, gettext("Unable to build a "
1410                             "pool from the specified devices\n"));
1411                         return (NULL);
1412                 }
1413 
1414                 if (zpool_is_bootable(zhp))
1415                         boot_type = ZPOOL_COPY_BOOT_LABEL;
1416                 else
1417                         boot_type = ZPOOL_NO_BOOT_LABEL;
1418 
1419                 if (!flags.dryrun &&
1420                     make_disks(zhp, newroot, boot_type, 0) != 0) {
1421                         nvlist_free(newroot);
1422                         return (NULL);
1423                 }
1424 
1425                 /* avoid any tricks in the spec */
1426                 verify(nvlist_lookup_nvlist_array(newroot,
1427                     ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
1428                 for (c = 0; c < children; c++) {
1429                         char *path;
1430                         const char *type;
1431                         int min, max;
1432 
1433                         verify(nvlist_lookup_string(child[c],
1434                             ZPOOL_CONFIG_PATH, &path) == 0);
1435                         if ((type = is_grouping(path, &min, &max)) != NULL) {
1436                                 (void) fprintf(stderr, gettext("Cannot use "
1437                                     "'%s' as a device for splitting\n"), type);
1438                                 nvlist_free(newroot);
1439                                 return (NULL);
1440                         }
1441                 }
1442         }
1443 
1444         if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
1445                 nvlist_free(newroot);
1446                 return (NULL);
1447         }
1448 
1449         return (newroot);
1450 }
1451 
1452 /*
1453  * Get and validate the contents of the given vdev specification.  This ensures
1454  * that the nvlist returned is well-formed, that all the devices exist, and that
1455  * they are not currently in use by any other known consumer.  The 'poolconfig'
1456  * parameter is the current configuration of the pool when adding devices
1457  * existing pool, and is used to perform additional checks, such as changing the
1458  * replication level of the pool.  It can be 'NULL' to indicate that this is a
1459  * new pool.  The 'force' flag controls whether devices should be forcefully
1460  * added, even if they appear in use.
1461  */
1462 nvlist_t *
1463 make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
1464     boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type,
1465     uint64_t boot_size, int argc, char **argv)
1466 {
1467         nvlist_t *newroot;
1468         nvlist_t *poolconfig = NULL;
1469         is_force = force;
1470 
1471         /*
1472          * Construct the vdev specification.  If this is successful, we know
1473          * that we have a valid specification, and that all devices can be
1474          * opened.
1475          */
1476         if ((newroot = construct_spec(argc, argv)) == NULL)
1477                 return (NULL);
1478 
1479         if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))
1480                 return (NULL);
1481 
1482         /*
1483          * Validate each device to make sure that its not shared with another
1484          * subsystem.  We do this even if 'force' is set, because there are some
1485          * uses (such as a dedicated dump device) that even '-f' cannot
1486          * override.
1487          */
1488         if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) {
1489                 nvlist_free(newroot);
1490                 return (NULL);
1491         }
1492 
1493         /*
1494          * Check the replication level of the given vdevs and report any errors
1495          * found.  We include the existing pool spec, if any, as we need to
1496          * catch changes against the existing replication level.
1497          */
1498         if (check_rep && check_replication(poolconfig, newroot) != 0) {
1499                 nvlist_free(newroot);
1500                 return (NULL);
1501         }
1502 
1503         /*
1504          * Run through the vdev specification and label any whole disks found.
1505          */
1506         if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) {
1507                 nvlist_free(newroot);
1508                 return (NULL);
1509         }
1510 
1511         return (newroot);
1512 }