Print this page
NEX-3558 KRRP Integration
OS-195 itadm needs an easily parsable output mode
OS-207 SUP-817 causes lint warnings in zpool_main.c
Reviewed by: Alek Pinchuk <alek.pinchuk@nexena.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Albert Lee <albert.lee@nexenta.com>
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
Bug 11205: add missing libzfs_closed_stubs.c to fix opensource-only build.
ZFS plus work: special vdevs, cos, cos/vdev properties
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/cmd/zpool/zpool_vdev.c
+++ new/usr/src/cmd/zpool/zpool_vdev.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
|
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 + * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
24 25 * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
25 26 * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
26 27 */
27 28
28 29 /*
29 30 * Functions to convert between a list of vdevs and an nvlist representing the
30 31 * configuration. Each entry in the list can be one of:
31 32 *
32 33 * Device vdevs
33 34 * disk=(path=..., devid=...)
34 35 * file=(path=...)
35 36 *
36 37 * Group vdevs
37 38 * raidz[1|2]=(...)
38 39 * mirror=(...)
39 40 *
40 41 * Hot spares
41 42 *
42 43 * While the underlying implementation supports it, group vdevs cannot contain
43 44 * other group vdevs. All userland verification of devices is contained within
44 45 * this file. If successful, the nvlist returned can be passed directly to the
45 46 * kernel; we've done as much verification as possible in userland.
46 47 *
47 48 * Hot spares are a special case, and passed down as an array of disk vdevs, at
48 49 * the same level as the root of the vdev tree.
49 50 *
50 51 * The only function exported by this file is 'make_root_vdev'. The
51 52 * function performs several passes:
52 53 *
53 54 * 1. Construct the vdev specification. Performs syntax validation and
54 55 * makes sure each device is valid.
55 56 * 2. Check for devices in use. Using libdiskmgt, makes sure that no
56 57 * devices are also in use. Some can be overridden using the 'force'
57 58 * flag, others cannot.
58 59 * 3. Check for replication errors if the 'force' flag is not specified.
59 60 * validates that the replication level is consistent across the
60 61 * entire pool.
61 62 * 4. Call libzfs to label any whole disks with an EFI label.
62 63 */
63 64
64 65 #include <assert.h>
65 66 #include <devid.h>
66 67 #include <errno.h>
67 68 #include <fcntl.h>
68 69 #include <libdiskmgt.h>
69 70 #include <libintl.h>
70 71 #include <libnvpair.h>
71 72 #include <limits.h>
72 73 #include <stdio.h>
73 74 #include <string.h>
74 75 #include <unistd.h>
75 76 #include <sys/efi_partition.h>
76 77 #include <sys/stat.h>
77 78 #include <sys/vtoc.h>
78 79 #include <sys/mntent.h>
79 80
80 81 #include "zpool_util.h"
81 82
82 83 #define BACKUP_SLICE "s2"
83 84
84 85 /*
85 86 * For any given vdev specification, we can have multiple errors. The
86 87 * vdev_error() function keeps track of whether we have seen an error yet, and
87 88 * prints out a header if its the first error we've seen.
88 89 */
89 90 boolean_t error_seen;
90 91 boolean_t is_force;
91 92
92 93 /*PRINTFLIKE1*/
93 94 static void
94 95 vdev_error(const char *fmt, ...)
95 96 {
96 97 va_list ap;
97 98
98 99 if (!error_seen) {
99 100 (void) fprintf(stderr, gettext("invalid vdev specification\n"));
100 101 if (!is_force)
101 102 (void) fprintf(stderr, gettext("use '-f' to override "
102 103 "the following errors:\n"));
103 104 else
104 105 (void) fprintf(stderr, gettext("the following errors "
105 106 "must be manually repaired:\n"));
106 107 error_seen = B_TRUE;
107 108 }
108 109
109 110 va_start(ap, fmt);
110 111 (void) vfprintf(stderr, fmt, ap);
111 112 va_end(ap);
112 113 }
113 114
114 115 static void
115 116 libdiskmgt_error(int error)
116 117 {
117 118 /*
118 119 * ENXIO/ENODEV is a valid error message if the device doesn't live in
119 120 * /dev/dsk. Don't bother printing an error message in this case.
120 121 */
121 122 if (error == ENXIO || error == ENODEV)
122 123 return;
123 124
124 125 (void) fprintf(stderr, gettext("warning: device in use checking "
125 126 "failed: %s\n"), strerror(error));
126 127 }
127 128
128 129 /*
129 130 * Validate a device, passing the bulk of the work off to libdiskmgt.
130 131 */
131 132 static int
132 133 check_slice(const char *path, int force, boolean_t wholedisk, boolean_t isspare)
133 134 {
134 135 char *msg;
135 136 int error = 0;
136 137 dm_who_type_t who;
137 138
138 139 if (force)
139 140 who = DM_WHO_ZPOOL_FORCE;
140 141 else if (isspare)
141 142 who = DM_WHO_ZPOOL_SPARE;
142 143 else
143 144 who = DM_WHO_ZPOOL;
144 145
145 146 if (dm_inuse((char *)path, &msg, who, &error) || error) {
146 147 if (error != 0) {
147 148 libdiskmgt_error(error);
148 149 return (0);
149 150 } else {
150 151 vdev_error("%s", msg);
151 152 free(msg);
152 153 return (-1);
153 154 }
154 155 }
155 156
156 157 /*
157 158 * If we're given a whole disk, ignore overlapping slices since we're
158 159 * about to label it anyway.
159 160 */
160 161 error = 0;
161 162 if (!wholedisk && !force &&
162 163 (dm_isoverlapping((char *)path, &msg, &error) || error)) {
163 164 if (error == 0) {
164 165 /* dm_isoverlapping returned -1 */
165 166 vdev_error(gettext("%s overlaps with %s\n"), path, msg);
166 167 free(msg);
167 168 return (-1);
168 169 } else if (error != ENODEV) {
169 170 /* libdiskmgt's devcache only handles physical drives */
170 171 libdiskmgt_error(error);
171 172 return (0);
172 173 }
173 174 }
174 175
175 176 return (0);
176 177 }
177 178
178 179
179 180 /*
180 181 * Validate a whole disk. Iterate over all slices on the disk and make sure
181 182 * that none is in use by calling check_slice().
182 183 */
183 184 static int
184 185 check_disk(const char *name, dm_descriptor_t disk, int force, int isspare)
185 186 {
186 187 dm_descriptor_t *drive, *media, *slice;
187 188 int err = 0;
188 189 int i;
189 190 int ret;
190 191
191 192 /*
192 193 * Get the drive associated with this disk. This should never fail,
193 194 * because we already have an alias handle open for the device.
194 195 */
195 196 if ((drive = dm_get_associated_descriptors(disk, DM_DRIVE,
196 197 &err)) == NULL || *drive == NULL) {
197 198 if (err)
198 199 libdiskmgt_error(err);
199 200 return (0);
200 201 }
201 202
202 203 if ((media = dm_get_associated_descriptors(*drive, DM_MEDIA,
203 204 &err)) == NULL) {
204 205 dm_free_descriptors(drive);
205 206 if (err)
206 207 libdiskmgt_error(err);
207 208 return (0);
208 209 }
209 210
210 211 dm_free_descriptors(drive);
211 212
212 213 /*
213 214 * It is possible that the user has specified a removable media drive,
214 215 * and the media is not present.
215 216 */
216 217 if (*media == NULL) {
217 218 dm_free_descriptors(media);
218 219 vdev_error(gettext("'%s' has no media in drive\n"), name);
219 220 return (-1);
220 221 }
221 222
222 223 if ((slice = dm_get_associated_descriptors(*media, DM_SLICE,
223 224 &err)) == NULL) {
224 225 dm_free_descriptors(media);
225 226 if (err)
226 227 libdiskmgt_error(err);
227 228 return (0);
228 229 }
229 230
230 231 dm_free_descriptors(media);
231 232
232 233 ret = 0;
233 234
234 235 /*
235 236 * Iterate over all slices and report any errors. We don't care about
236 237 * overlapping slices because we are using the whole disk.
237 238 */
238 239 for (i = 0; slice[i] != NULL; i++) {
239 240 char *name = dm_get_name(slice[i], &err);
240 241
241 242 if (check_slice(name, force, B_TRUE, isspare) != 0)
242 243 ret = -1;
243 244
244 245 dm_free_name(name);
245 246 }
246 247
247 248 dm_free_descriptors(slice);
248 249 return (ret);
249 250 }
250 251
251 252 /*
252 253 * Validate a device.
253 254 */
254 255 static int
255 256 check_device(const char *path, boolean_t force, boolean_t isspare)
256 257 {
257 258 dm_descriptor_t desc;
258 259 int err;
259 260 char *dev;
260 261
261 262 /*
262 263 * For whole disks, libdiskmgt does not include the leading dev path.
263 264 */
264 265 dev = strrchr(path, '/');
265 266 assert(dev != NULL);
266 267 dev++;
267 268 if ((desc = dm_get_descriptor_by_name(DM_ALIAS, dev, &err)) != NULL) {
268 269 err = check_disk(path, desc, force, isspare);
269 270 dm_free_descriptor(desc);
270 271 return (err);
271 272 }
272 273
273 274 return (check_slice(path, force, B_FALSE, isspare));
274 275 }
275 276
276 277 /*
277 278 * Check that a file is valid. All we can do in this case is check that it's
278 279 * not in use by another pool, and not in use by swap.
279 280 */
280 281 static int
281 282 check_file(const char *file, boolean_t force, boolean_t isspare)
282 283 {
283 284 char *name;
284 285 int fd;
285 286 int ret = 0;
286 287 int err;
287 288 pool_state_t state;
288 289 boolean_t inuse;
289 290
290 291 if (dm_inuse_swap(file, &err)) {
291 292 if (err)
292 293 libdiskmgt_error(err);
293 294 else
294 295 vdev_error(gettext("%s is currently used by swap. "
295 296 "Please see swap(1M).\n"), file);
296 297 return (-1);
297 298 }
298 299
299 300 if ((fd = open(file, O_RDONLY)) < 0)
300 301 return (0);
301 302
302 303 if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
303 304 const char *desc;
304 305
305 306 switch (state) {
306 307 case POOL_STATE_ACTIVE:
307 308 desc = gettext("active");
308 309 break;
309 310
310 311 case POOL_STATE_EXPORTED:
311 312 desc = gettext("exported");
312 313 break;
313 314
314 315 case POOL_STATE_POTENTIALLY_ACTIVE:
315 316 desc = gettext("potentially active");
316 317 break;
317 318
318 319 default:
319 320 desc = gettext("unknown");
320 321 break;
321 322 }
322 323
323 324 /*
324 325 * Allow hot spares to be shared between pools.
325 326 */
326 327 if (state == POOL_STATE_SPARE && isspare)
327 328 return (0);
328 329
329 330 if (state == POOL_STATE_ACTIVE ||
330 331 state == POOL_STATE_SPARE || !force) {
331 332 switch (state) {
332 333 case POOL_STATE_SPARE:
333 334 vdev_error(gettext("%s is reserved as a hot "
334 335 "spare for pool %s\n"), file, name);
335 336 break;
336 337 default:
337 338 vdev_error(gettext("%s is part of %s pool "
338 339 "'%s'\n"), file, desc, name);
339 340 break;
340 341 }
341 342 ret = -1;
342 343 }
343 344
344 345 free(name);
345 346 }
346 347
347 348 (void) close(fd);
348 349 return (ret);
349 350 }
350 351
351 352
352 353 /*
353 354 * By "whole disk" we mean an entire physical disk (something we can
354 355 * label, toggle the write cache on, etc.) as opposed to the full
355 356 * capacity of a pseudo-device such as lofi or did. We act as if we
356 357 * are labeling the disk, which should be a pretty good test of whether
357 358 * it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
358 359 * it isn't.
359 360 */
360 361 static boolean_t
361 362 is_whole_disk(const char *arg)
362 363 {
363 364 struct dk_gpt *label;
364 365 int fd;
365 366 char path[MAXPATHLEN];
366 367
367 368 (void) snprintf(path, sizeof (path), "%s%s%s",
368 369 ZFS_RDISK_ROOT, strrchr(arg, '/'), BACKUP_SLICE);
369 370 if ((fd = open(path, O_RDWR | O_NDELAY)) < 0)
370 371 return (B_FALSE);
371 372 if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
372 373 (void) close(fd);
373 374 return (B_FALSE);
374 375 }
375 376 efi_free(label);
376 377 (void) close(fd);
377 378 return (B_TRUE);
378 379 }
379 380
|
↓ open down ↓ |
346 lines elided |
↑ open up ↑ |
380 381 /*
381 382 * Create a leaf vdev. Determine if this is a file or a device. If it's a
382 383 * device, fill in the device id to make a complete nvlist. Valid forms for a
383 384 * leaf vdev are:
384 385 *
385 386 * /dev/dsk/xxx Complete disk path
386 387 * /xxx Full path to file
387 388 * xxx Shorthand for /dev/dsk/xxx
388 389 */
389 390 static nvlist_t *
390 -make_leaf_vdev(const char *arg, uint64_t is_log)
391 +make_leaf_vdev(const char *arg, uint64_t is_log, uint64_t is_special)
391 392 {
392 393 char path[MAXPATHLEN];
393 394 struct stat64 statbuf;
394 395 nvlist_t *vdev = NULL;
395 396 char *type = NULL;
396 397 boolean_t wholedisk = B_FALSE;
397 398
398 399 /*
399 400 * Determine what type of vdev this is, and put the full path into
400 401 * 'path'. We detect whether this is a device of file afterwards by
401 402 * checking the st_mode of the file.
402 403 */
403 404 if (arg[0] == '/') {
404 405 /*
405 406 * Complete device or file path. Exact type is determined by
406 407 * examining the file descriptor afterwards.
407 408 */
408 409 wholedisk = is_whole_disk(arg);
409 410 if (!wholedisk && (stat64(arg, &statbuf) != 0)) {
410 411 (void) fprintf(stderr,
411 412 gettext("cannot open '%s': %s\n"),
412 413 arg, strerror(errno));
413 414 return (NULL);
414 415 }
415 416
416 417 (void) strlcpy(path, arg, sizeof (path));
417 418 } else {
418 419 /*
419 420 * This may be a short path for a device, or it could be total
420 421 * gibberish. Check to see if it's a known device in
421 422 * /dev/dsk/. As part of this check, see if we've been given a
422 423 * an entire disk (minus the slice number).
423 424 */
424 425 (void) snprintf(path, sizeof (path), "%s/%s", ZFS_DISK_ROOT,
425 426 arg);
426 427 wholedisk = is_whole_disk(path);
427 428 if (!wholedisk && (stat64(path, &statbuf) != 0)) {
428 429 /*
429 430 * If we got ENOENT, then the user gave us
430 431 * gibberish, so try to direct them with a
431 432 * reasonable error message. Otherwise,
432 433 * regurgitate strerror() since it's the best we
433 434 * can do.
434 435 */
435 436 if (errno == ENOENT) {
436 437 (void) fprintf(stderr,
437 438 gettext("cannot open '%s': no such "
438 439 "device in %s\n"), arg, ZFS_DISK_ROOT);
439 440 (void) fprintf(stderr,
440 441 gettext("must be a full path or "
441 442 "shorthand device name\n"));
442 443 return (NULL);
443 444 } else {
444 445 (void) fprintf(stderr,
445 446 gettext("cannot open '%s': %s\n"),
446 447 path, strerror(errno));
447 448 return (NULL);
448 449 }
449 450 }
450 451 }
451 452
452 453 /*
453 454 * Determine whether this is a device or a file.
454 455 */
455 456 if (wholedisk || S_ISBLK(statbuf.st_mode)) {
456 457 type = VDEV_TYPE_DISK;
457 458 } else if (S_ISREG(statbuf.st_mode)) {
458 459 type = VDEV_TYPE_FILE;
459 460 } else {
460 461 (void) fprintf(stderr, gettext("cannot use '%s': must be a "
461 462 "block device or regular file\n"), path);
462 463 return (NULL);
463 464 }
|
↓ open down ↓ |
63 lines elided |
↑ open up ↑ |
464 465
465 466 /*
466 467 * Finally, we have the complete device or file, and we know that it is
467 468 * acceptable to use. Construct the nvlist to describe this vdev. All
468 469 * vdevs have a 'path' element, and devices also have a 'devid' element.
469 470 */
470 471 verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
471 472 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
472 473 verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
473 474 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_LOG, is_log) == 0);
475 + verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_SPECIAL,
476 + is_special) == 0);
474 477 if (strcmp(type, VDEV_TYPE_DISK) == 0)
475 478 verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
476 479 (uint64_t)wholedisk) == 0);
477 480
478 481 /*
479 482 * For a whole disk, defer getting its devid until after labeling it.
480 483 */
481 484 if (S_ISBLK(statbuf.st_mode) && !wholedisk) {
482 485 /*
483 486 * Get the devid for the device.
484 487 */
485 488 int fd;
486 489 ddi_devid_t devid;
487 490 char *minor = NULL, *devid_str = NULL;
488 491
489 492 if ((fd = open(path, O_RDONLY)) < 0) {
490 493 (void) fprintf(stderr, gettext("cannot open '%s': "
491 494 "%s\n"), path, strerror(errno));
492 495 nvlist_free(vdev);
493 496 return (NULL);
494 497 }
495 498
496 499 if (devid_get(fd, &devid) == 0) {
497 500 if (devid_get_minor_name(fd, &minor) == 0 &&
498 501 (devid_str = devid_str_encode(devid, minor)) !=
499 502 NULL) {
500 503 verify(nvlist_add_string(vdev,
501 504 ZPOOL_CONFIG_DEVID, devid_str) == 0);
502 505 }
503 506 if (devid_str != NULL)
504 507 devid_str_free(devid_str);
505 508 if (minor != NULL)
506 509 devid_str_free(minor);
507 510 devid_free(devid);
508 511 }
509 512
510 513 (void) close(fd);
511 514 }
512 515
513 516 return (vdev);
514 517 }
515 518
516 519 /*
517 520 * Go through and verify the replication level of the pool is consistent.
518 521 * Performs the following checks:
519 522 *
520 523 * For the new spec, verifies that devices in mirrors and raidz are the
521 524 * same size.
522 525 *
523 526 * If the current configuration already has inconsistent replication
524 527 * levels, ignore any other potential problems in the new spec.
525 528 *
526 529 * Otherwise, make sure that the current spec (if there is one) and the new
527 530 * spec have consistent replication levels.
528 531 */
529 532 typedef struct replication_level {
530 533 char *zprl_type;
531 534 uint64_t zprl_children;
532 535 uint64_t zprl_parity;
533 536 } replication_level_t;
534 537
535 538 #define ZPOOL_FUZZ (16 * 1024 * 1024)
536 539
537 540 /*
538 541 * Given a list of toplevel vdevs, return the current replication level. If
539 542 * the config is inconsistent, then NULL is returned. If 'fatal' is set, then
540 543 * an error message will be displayed for each self-inconsistent vdev.
541 544 */
542 545 static replication_level_t *
543 546 get_replication(nvlist_t *nvroot, boolean_t fatal)
544 547 {
545 548 nvlist_t **top;
546 549 uint_t t, toplevels;
547 550 nvlist_t **child;
548 551 uint_t c, children;
549 552 nvlist_t *nv;
550 553 char *type;
551 554 replication_level_t lastrep = {0};
552 555 replication_level_t rep;
553 556 replication_level_t *ret;
554 557 boolean_t dontreport;
555 558
556 559 ret = safe_malloc(sizeof (replication_level_t));
557 560
558 561 verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
559 562 &top, &toplevels) == 0);
560 563
561 564 for (t = 0; t < toplevels; t++) {
562 565 uint64_t is_log = B_FALSE;
563 566
564 567 nv = top[t];
565 568
566 569 /*
567 570 * For separate logs we ignore the top level vdev replication
568 571 * constraints.
569 572 */
570 573 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
571 574 if (is_log)
572 575 continue;
573 576
574 577 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE,
575 578 &type) == 0);
576 579 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
577 580 &child, &children) != 0) {
578 581 /*
579 582 * This is a 'file' or 'disk' vdev.
580 583 */
581 584 rep.zprl_type = type;
582 585 rep.zprl_children = 1;
583 586 rep.zprl_parity = 0;
584 587 } else {
585 588 uint64_t vdev_size;
586 589
587 590 /*
588 591 * This is a mirror or RAID-Z vdev. Go through and make
589 592 * sure the contents are all the same (files vs. disks),
590 593 * keeping track of the number of elements in the
591 594 * process.
592 595 *
593 596 * We also check that the size of each vdev (if it can
594 597 * be determined) is the same.
595 598 */
596 599 rep.zprl_type = type;
597 600 rep.zprl_children = 0;
598 601
599 602 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
600 603 verify(nvlist_lookup_uint64(nv,
601 604 ZPOOL_CONFIG_NPARITY,
602 605 &rep.zprl_parity) == 0);
603 606 assert(rep.zprl_parity != 0);
604 607 } else {
605 608 rep.zprl_parity = 0;
606 609 }
607 610
608 611 /*
609 612 * The 'dontreport' variable indicates that we've
610 613 * already reported an error for this spec, so don't
611 614 * bother doing it again.
612 615 */
613 616 type = NULL;
614 617 dontreport = 0;
615 618 vdev_size = -1ULL;
616 619 for (c = 0; c < children; c++) {
617 620 nvlist_t *cnv = child[c];
618 621 char *path;
619 622 struct stat64 statbuf;
620 623 uint64_t size = -1ULL;
|
↓ open down ↓ |
137 lines elided |
↑ open up ↑ |
621 624 char *childtype;
622 625 int fd, err;
623 626
624 627 rep.zprl_children++;
625 628
626 629 verify(nvlist_lookup_string(cnv,
627 630 ZPOOL_CONFIG_TYPE, &childtype) == 0);
628 631
629 632 /*
630 633 * If this is a replacing or spare vdev, then
631 - * get the real first child of the vdev: do this
632 - * in a loop because replacing and spare vdevs
633 - * can be nested.
634 + * get the real first child of the vdev.
634 635 */
635 - while (strcmp(childtype,
636 + if (strcmp(childtype,
636 637 VDEV_TYPE_REPLACING) == 0 ||
637 638 strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
638 639 nvlist_t **rchild;
639 640 uint_t rchildren;
640 641
641 642 verify(nvlist_lookup_nvlist_array(cnv,
642 643 ZPOOL_CONFIG_CHILDREN, &rchild,
643 644 &rchildren) == 0);
644 645 assert(rchildren == 2);
645 646 cnv = rchild[0];
646 647
647 648 verify(nvlist_lookup_string(cnv,
648 649 ZPOOL_CONFIG_TYPE,
649 650 &childtype) == 0);
650 651 }
651 652
652 653 verify(nvlist_lookup_string(cnv,
653 654 ZPOOL_CONFIG_PATH, &path) == 0);
654 655
655 656 /*
656 657 * If we have a raidz/mirror that combines disks
657 658 * with files, report it as an error.
658 659 */
659 660 if (!dontreport && type != NULL &&
660 661 strcmp(type, childtype) != 0) {
661 662 if (ret != NULL)
662 663 free(ret);
663 664 ret = NULL;
664 665 if (fatal)
665 666 vdev_error(gettext(
666 667 "mismatched replication "
667 668 "level: %s contains both "
668 669 "files and devices\n"),
669 670 rep.zprl_type);
670 671 else
671 672 return (NULL);
672 673 dontreport = B_TRUE;
673 674 }
674 675
675 676 /*
676 677 * According to stat(2), the value of 'st_size'
677 678 * is undefined for block devices and character
678 679 * devices. But there is no effective way to
679 680 * determine the real size in userland.
680 681 *
681 682 * Instead, we'll take advantage of an
682 683 * implementation detail of spec_size(). If the
683 684 * device is currently open, then we (should)
684 685 * return a valid size.
685 686 *
686 687 * If we still don't get a valid size (indicated
687 688 * by a size of 0 or MAXOFFSET_T), then ignore
688 689 * this device altogether.
689 690 */
690 691 if ((fd = open(path, O_RDONLY)) >= 0) {
691 692 err = fstat64(fd, &statbuf);
692 693 (void) close(fd);
693 694 } else {
694 695 err = stat64(path, &statbuf);
695 696 }
696 697
697 698 if (err != 0 ||
698 699 statbuf.st_size == 0 ||
699 700 statbuf.st_size == MAXOFFSET_T)
700 701 continue;
701 702
702 703 size = statbuf.st_size;
703 704
704 705 /*
705 706 * Also make sure that devices and
706 707 * slices have a consistent size. If
707 708 * they differ by a significant amount
708 709 * (~16MB) then report an error.
709 710 */
710 711 if (!dontreport &&
711 712 (vdev_size != -1ULL &&
712 713 (labs(size - vdev_size) >
713 714 ZPOOL_FUZZ))) {
714 715 if (ret != NULL)
715 716 free(ret);
716 717 ret = NULL;
717 718 if (fatal)
718 719 vdev_error(gettext(
719 720 "%s contains devices of "
720 721 "different sizes\n"),
721 722 rep.zprl_type);
722 723 else
723 724 return (NULL);
724 725 dontreport = B_TRUE;
725 726 }
726 727
727 728 type = childtype;
728 729 vdev_size = size;
729 730 }
730 731 }
731 732
732 733 /*
733 734 * At this point, we have the replication of the last toplevel
734 735 * vdev in 'rep'. Compare it to 'lastrep' to see if its
735 736 * different.
736 737 */
737 738 if (lastrep.zprl_type != NULL) {
738 739 if (strcmp(lastrep.zprl_type, rep.zprl_type) != 0) {
739 740 if (ret != NULL)
740 741 free(ret);
741 742 ret = NULL;
742 743 if (fatal)
743 744 vdev_error(gettext(
744 745 "mismatched replication level: "
745 746 "both %s and %s vdevs are "
746 747 "present\n"),
747 748 lastrep.zprl_type, rep.zprl_type);
748 749 else
749 750 return (NULL);
750 751 } else if (lastrep.zprl_parity != rep.zprl_parity) {
751 752 if (ret)
752 753 free(ret);
753 754 ret = NULL;
754 755 if (fatal)
755 756 vdev_error(gettext(
756 757 "mismatched replication level: "
757 758 "both %llu and %llu device parity "
758 759 "%s vdevs are present\n"),
759 760 lastrep.zprl_parity,
760 761 rep.zprl_parity,
761 762 rep.zprl_type);
762 763 else
763 764 return (NULL);
764 765 } else if (lastrep.zprl_children != rep.zprl_children) {
765 766 if (ret)
766 767 free(ret);
767 768 ret = NULL;
768 769 if (fatal)
769 770 vdev_error(gettext(
770 771 "mismatched replication level: "
771 772 "both %llu-way and %llu-way %s "
772 773 "vdevs are present\n"),
773 774 lastrep.zprl_children,
774 775 rep.zprl_children,
775 776 rep.zprl_type);
776 777 else
777 778 return (NULL);
778 779 }
779 780 }
780 781 lastrep = rep;
781 782 }
782 783
783 784 if (ret != NULL)
784 785 *ret = rep;
785 786
786 787 return (ret);
787 788 }
788 789
789 790 /*
790 791 * Check the replication level of the vdev spec against the current pool. Calls
791 792 * get_replication() to make sure the new spec is self-consistent. If the pool
792 793 * has a consistent replication level, then we ignore any errors. Otherwise,
793 794 * report any difference between the two.
794 795 */
795 796 static int
796 797 check_replication(nvlist_t *config, nvlist_t *newroot)
797 798 {
798 799 nvlist_t **child;
799 800 uint_t children;
800 801 replication_level_t *current = NULL, *new;
801 802 int ret;
802 803
803 804 /*
804 805 * If we have a current pool configuration, check to see if it's
805 806 * self-consistent. If not, simply return success.
806 807 */
807 808 if (config != NULL) {
808 809 nvlist_t *nvroot;
809 810
810 811 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
811 812 &nvroot) == 0);
812 813 if ((current = get_replication(nvroot, B_FALSE)) == NULL)
813 814 return (0);
814 815 }
815 816 /*
816 817 * for spares there may be no children, and therefore no
817 818 * replication level to check
818 819 */
819 820 if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN,
820 821 &child, &children) != 0) || (children == 0)) {
821 822 free(current);
822 823 return (0);
823 824 }
824 825
825 826 /*
826 827 * If all we have is logs then there's no replication level to check.
827 828 */
828 829 if (num_logs(newroot) == children) {
829 830 free(current);
830 831 return (0);
831 832 }
832 833
833 834 /*
834 835 * Get the replication level of the new vdev spec, reporting any
835 836 * inconsistencies found.
836 837 */
837 838 if ((new = get_replication(newroot, B_TRUE)) == NULL) {
838 839 free(current);
839 840 return (-1);
840 841 }
841 842
842 843 /*
843 844 * Check to see if the new vdev spec matches the replication level of
844 845 * the current pool.
845 846 */
846 847 ret = 0;
847 848 if (current != NULL) {
848 849 if (strcmp(current->zprl_type, new->zprl_type) != 0) {
849 850 vdev_error(gettext(
850 851 "mismatched replication level: pool uses %s "
851 852 "and new vdev is %s\n"),
852 853 current->zprl_type, new->zprl_type);
853 854 ret = -1;
854 855 } else if (current->zprl_parity != new->zprl_parity) {
855 856 vdev_error(gettext(
856 857 "mismatched replication level: pool uses %llu "
857 858 "device parity and new vdev uses %llu\n"),
858 859 current->zprl_parity, new->zprl_parity);
859 860 ret = -1;
860 861 } else if (current->zprl_children != new->zprl_children) {
861 862 vdev_error(gettext(
862 863 "mismatched replication level: pool uses %llu-way "
863 864 "%s and new vdev uses %llu-way %s\n"),
864 865 current->zprl_children, current->zprl_type,
865 866 new->zprl_children, new->zprl_type);
866 867 ret = -1;
867 868 }
868 869 }
869 870
870 871 free(new);
871 872 if (current != NULL)
872 873 free(current);
873 874
874 875 return (ret);
875 876 }
876 877
877 878 /*
878 879 * Go through and find any whole disks in the vdev specification, labelling them
879 880 * as appropriate. When constructing the vdev spec, we were unable to open this
880 881 * device in order to provide a devid. Now that we have labelled the disk and
881 882 * know the pool slice is valid, we can construct the devid now.
882 883 *
883 884 * If the disk was already labeled with an EFI label, we will have gotten the
884 885 * devid already (because we were able to open the whole disk). Otherwise, we
885 886 * need to get the devid after we label the disk.
886 887 */
887 888 static int
888 889 make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type,
889 890 uint64_t boot_size)
890 891 {
891 892 nvlist_t **child;
892 893 uint_t c, children;
893 894 char *type, *path, *diskname;
894 895 char buf[MAXPATHLEN];
895 896 uint64_t wholedisk;
896 897 int fd;
897 898 int ret;
898 899 int slice;
899 900 ddi_devid_t devid;
900 901 char *minor = NULL, *devid_str = NULL;
901 902
902 903 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
903 904
904 905 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
905 906 &child, &children) != 0) {
906 907
907 908 if (strcmp(type, VDEV_TYPE_DISK) != 0)
908 909 return (0);
909 910
910 911 /*
911 912 * We have a disk device. Get the path to the device
912 913 * and see if it's a whole disk by appending the backup
913 914 * slice and stat()ing the device.
914 915 */
915 916 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
916 917
917 918 diskname = strrchr(path, '/');
918 919 assert(diskname != NULL);
919 920 diskname++;
920 921
921 922 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
922 923 &wholedisk) != 0 || !wholedisk) {
923 924 /*
924 925 * This is not whole disk, return error if
925 926 * boot partition creation was requested
926 927 */
927 928 if (boot_type == ZPOOL_CREATE_BOOT_LABEL) {
928 929 (void) fprintf(stderr,
929 930 gettext("creating boot partition is only "
930 931 "supported on whole disk vdevs: %s\n"),
931 932 diskname);
932 933 return (-1);
933 934 }
934 935 return (0);
935 936 }
936 937
937 938 ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type,
938 939 boot_size, &slice);
939 940 if (ret == -1)
940 941 return (ret);
941 942
942 943 /*
943 944 * Fill in the devid, now that we've labeled the disk.
944 945 */
945 946 (void) snprintf(buf, sizeof (buf), "%ss%d", path, slice);
946 947 if ((fd = open(buf, O_RDONLY)) < 0) {
947 948 (void) fprintf(stderr,
948 949 gettext("cannot open '%s': %s\n"),
949 950 buf, strerror(errno));
950 951 return (-1);
951 952 }
952 953
953 954 if (devid_get(fd, &devid) == 0) {
954 955 if (devid_get_minor_name(fd, &minor) == 0 &&
955 956 (devid_str = devid_str_encode(devid, minor)) !=
956 957 NULL) {
957 958 verify(nvlist_add_string(nv,
958 959 ZPOOL_CONFIG_DEVID, devid_str) == 0);
959 960 }
960 961 if (devid_str != NULL)
961 962 devid_str_free(devid_str);
962 963 if (minor != NULL)
963 964 devid_str_free(minor);
964 965 devid_free(devid);
965 966 }
966 967
967 968 /*
968 969 * Update the path to refer to the pool slice. The presence of
969 970 * the 'whole_disk' field indicates to the CLI that we should
970 971 * chop off the slice number when displaying the device in
971 972 * future output.
972 973 */
973 974 verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, buf) == 0);
974 975
975 976 (void) close(fd);
976 977
977 978 return (0);
978 979 }
979 980
980 981 /* illumos kernel does not support booting from multi-vdev pools. */
981 982 if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) {
982 983 if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) {
983 984 (void) fprintf(stderr, gettext("boot pool "
984 985 "can not have more than one vdev\n"));
985 986 return (-1);
986 987 }
987 988 }
988 989
989 990 for (c = 0; c < children; c++) {
990 991 ret = make_disks(zhp, child[c], boot_type, boot_size);
991 992 if (ret != 0)
992 993 return (ret);
993 994 }
994 995
995 996 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
996 997 &child, &children) == 0)
997 998 for (c = 0; c < children; c++) {
998 999 ret = make_disks(zhp, child[c], boot_type, boot_size);
999 1000 if (ret != 0)
1000 1001 return (ret);
1001 1002 }
1002 1003
1003 1004 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1004 1005 &child, &children) == 0)
1005 1006 for (c = 0; c < children; c++) {
1006 1007 ret = make_disks(zhp, child[c], boot_type, boot_size);
1007 1008 if (ret != 0)
1008 1009 return (ret);
1009 1010 }
1010 1011
1011 1012 return (0);
1012 1013 }
1013 1014
1014 1015 /*
1015 1016 * Determine if the given path is a hot spare within the given configuration.
1016 1017 */
1017 1018 static boolean_t
1018 1019 is_spare(nvlist_t *config, const char *path)
1019 1020 {
1020 1021 int fd;
1021 1022 pool_state_t state;
1022 1023 char *name = NULL;
1023 1024 nvlist_t *label;
1024 1025 uint64_t guid, spareguid;
1025 1026 nvlist_t *nvroot;
1026 1027 nvlist_t **spares;
1027 1028 uint_t i, nspares;
1028 1029 boolean_t inuse;
1029 1030
1030 1031 if ((fd = open(path, O_RDONLY)) < 0)
1031 1032 return (B_FALSE);
1032 1033
1033 1034 if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
1034 1035 !inuse ||
1035 1036 state != POOL_STATE_SPARE ||
1036 1037 zpool_read_label(fd, &label) != 0) {
1037 1038 free(name);
1038 1039 (void) close(fd);
1039 1040 return (B_FALSE);
1040 1041 }
1041 1042 free(name);
1042 1043 (void) close(fd);
1043 1044
1044 1045 verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
1045 1046 nvlist_free(label);
1046 1047
1047 1048 verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
1048 1049 &nvroot) == 0);
1049 1050 if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1050 1051 &spares, &nspares) == 0) {
1051 1052 for (i = 0; i < nspares; i++) {
1052 1053 verify(nvlist_lookup_uint64(spares[i],
1053 1054 ZPOOL_CONFIG_GUID, &spareguid) == 0);
1054 1055 if (spareguid == guid)
1055 1056 return (B_TRUE);
1056 1057 }
1057 1058 }
1058 1059
1059 1060 return (B_FALSE);
1060 1061 }
1061 1062
1062 1063 /*
1063 1064 * Go through and find any devices that are in use. We rely on libdiskmgt for
1064 1065 * the majority of this task.
1065 1066 */
1066 1067 static boolean_t
1067 1068 is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
1068 1069 boolean_t replacing, boolean_t isspare)
1069 1070 {
1070 1071 nvlist_t **child;
1071 1072 uint_t c, children;
1072 1073 char *type, *path;
1073 1074 int ret = 0;
1074 1075 char buf[MAXPATHLEN];
1075 1076 uint64_t wholedisk;
1076 1077 boolean_t anyinuse = B_FALSE;
1077 1078
1078 1079 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
1079 1080
1080 1081 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1081 1082 &child, &children) != 0) {
1082 1083
1083 1084 verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
1084 1085
1085 1086 /*
1086 1087 * As a generic check, we look to see if this is a replace of a
1087 1088 * hot spare within the same pool. If so, we allow it
1088 1089 * regardless of what libdiskmgt or zpool_in_use() says.
1089 1090 */
1090 1091 if (replacing) {
1091 1092 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1092 1093 &wholedisk) == 0 && wholedisk)
1093 1094 (void) snprintf(buf, sizeof (buf), "%ss0",
1094 1095 path);
1095 1096 else
1096 1097 (void) strlcpy(buf, path, sizeof (buf));
1097 1098
1098 1099 if (is_spare(config, buf))
1099 1100 return (B_FALSE);
1100 1101 }
1101 1102
1102 1103 if (strcmp(type, VDEV_TYPE_DISK) == 0)
1103 1104 ret = check_device(path, force, isspare);
1104 1105 else if (strcmp(type, VDEV_TYPE_FILE) == 0)
1105 1106 ret = check_file(path, force, isspare);
1106 1107
1107 1108 return (ret != 0);
1108 1109 }
1109 1110
1110 1111 for (c = 0; c < children; c++)
1111 1112 if (is_device_in_use(config, child[c], force, replacing,
1112 1113 B_FALSE))
1113 1114 anyinuse = B_TRUE;
1114 1115
1115 1116 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1116 1117 &child, &children) == 0)
1117 1118 for (c = 0; c < children; c++)
1118 1119 if (is_device_in_use(config, child[c], force, replacing,
1119 1120 B_TRUE))
1120 1121 anyinuse = B_TRUE;
1121 1122
1122 1123 if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1123 1124 &child, &children) == 0)
1124 1125 for (c = 0; c < children; c++)
1125 1126 if (is_device_in_use(config, child[c], force, replacing,
1126 1127 B_FALSE))
1127 1128 anyinuse = B_TRUE;
1128 1129
1129 1130 return (anyinuse);
1130 1131 }
1131 1132
1132 1133 static const char *
1133 1134 is_grouping(const char *type, int *mindev, int *maxdev)
1134 1135 {
1135 1136 if (strncmp(type, "raidz", 5) == 0) {
1136 1137 const char *p = type + 5;
1137 1138 char *end;
1138 1139 long nparity;
1139 1140
1140 1141 if (*p == '\0') {
1141 1142 nparity = 1;
1142 1143 } else if (*p == '0') {
1143 1144 return (NULL); /* no zero prefixes allowed */
1144 1145 } else {
1145 1146 errno = 0;
1146 1147 nparity = strtol(p, &end, 10);
1147 1148 if (errno != 0 || nparity < 1 || nparity >= 255 ||
1148 1149 *end != '\0')
1149 1150 return (NULL);
1150 1151 }
1151 1152
1152 1153 if (mindev != NULL)
1153 1154 *mindev = nparity + 1;
1154 1155 if (maxdev != NULL)
1155 1156 *maxdev = 255;
1156 1157 return (VDEV_TYPE_RAIDZ);
1157 1158 }
1158 1159
1159 1160 if (maxdev != NULL)
1160 1161 *maxdev = INT_MAX;
1161 1162
1162 1163 if (strcmp(type, "mirror") == 0) {
1163 1164 if (mindev != NULL)
1164 1165 *mindev = 2;
1165 1166 return (VDEV_TYPE_MIRROR);
1166 1167 }
1167 1168
1168 1169 if (strcmp(type, "spare") == 0) {
1169 1170 if (mindev != NULL)
1170 1171 *mindev = 1;
1171 1172 return (VDEV_TYPE_SPARE);
1172 1173 }
1173 1174
1174 1175 if (strcmp(type, "log") == 0) {
1175 1176 if (mindev != NULL)
|
↓ open down ↓ |
530 lines elided |
↑ open up ↑ |
1176 1177 *mindev = 1;
1177 1178 return (VDEV_TYPE_LOG);
1178 1179 }
1179 1180
1180 1181 if (strcmp(type, "cache") == 0) {
1181 1182 if (mindev != NULL)
1182 1183 *mindev = 1;
1183 1184 return (VDEV_TYPE_L2CACHE);
1184 1185 }
1185 1186
1187 + if (strcmp(type, "special") == 0) {
1188 + if (mindev != NULL)
1189 + *mindev = 1;
1190 + return (VDEV_TYPE_SPECIAL);
1191 + }
1192 +
1186 1193 return (NULL);
1187 1194 }
1188 1195
1189 1196 /*
1190 1197 * Construct a syntactically valid vdev specification,
1191 1198 * and ensure that all devices and files exist and can be opened.
1192 1199 * Note: we don't bother freeing anything in the error paths
1193 1200 * because the program is just going to exit anyway.
1194 1201 */
1195 1202 nvlist_t *
1196 1203 construct_spec(int argc, char **argv)
1197 1204 {
1198 1205 nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
1199 1206 int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
1207 + int nspecial = 0;
1200 1208 const char *type;
1201 - uint64_t is_log;
1202 - boolean_t seen_logs;
1209 + boolean_t is_log, seen_logs;
1210 + boolean_t is_special, seen_special;
1203 1211
1204 1212 top = NULL;
1205 1213 toplevels = 0;
1206 1214 spares = NULL;
1207 1215 l2cache = NULL;
1208 1216 nspares = 0;
1209 1217 nlogs = 0;
1210 1218 nl2cache = 0;
1211 1219 is_log = B_FALSE;
1212 1220 seen_logs = B_FALSE;
1221 + is_special = B_FALSE;
1222 + seen_special = B_FALSE;
1213 1223
1214 1224 while (argc > 0) {
1215 1225 nv = NULL;
1216 1226
1217 1227 /*
1218 1228 * If it's a mirror or raidz, the subsequent arguments are
1219 1229 * its leaves -- until we encounter the next mirror or raidz.
1220 1230 */
1221 1231 if ((type = is_grouping(argv[0], &mindev, &maxdev)) != NULL) {
1222 1232 nvlist_t **child = NULL;
1223 1233 int c, children = 0;
|
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
1224 1234
1225 1235 if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1226 1236 if (spares != NULL) {
1227 1237 (void) fprintf(stderr,
1228 1238 gettext("invalid vdev "
1229 1239 "specification: 'spare' can be "
1230 1240 "specified only once\n"));
1231 1241 return (NULL);
1232 1242 }
1233 1243 is_log = B_FALSE;
1244 + is_special = B_FALSE;
1234 1245 }
1235 1246
1236 1247 if (strcmp(type, VDEV_TYPE_LOG) == 0) {
1237 1248 if (seen_logs) {
1238 1249 (void) fprintf(stderr,
1239 1250 gettext("invalid vdev "
1240 1251 "specification: 'log' can be "
1241 1252 "specified only once\n"));
1242 1253 return (NULL);
1243 1254 }
1244 1255 seen_logs = B_TRUE;
1245 1256 is_log = B_TRUE;
1257 + is_special = B_FALSE;
1246 1258 argc--;
1247 1259 argv++;
1248 1260 /*
1249 1261 * A log is not a real grouping device.
1250 1262 * We just set is_log and continue.
1251 1263 */
1252 1264 continue;
1253 1265 }
1254 1266
1255 1267 if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1256 1268 if (l2cache != NULL) {
1257 1269 (void) fprintf(stderr,
1258 1270 gettext("invalid vdev "
1259 1271 "specification: 'cache' can be "
1260 1272 "specified only once\n"));
1261 1273 return (NULL);
1262 1274 }
1263 1275 is_log = B_FALSE;
1276 + is_special = B_FALSE;
1264 1277 }
1265 1278
1279 + if (strcmp(type, VDEV_TYPE_SPECIAL) == 0) {
1280 + if (seen_special) {
1281 + (void) fprintf(stderr,
1282 + gettext("invalid vdev "
1283 + "specification: 'special' can be "
1284 + "specified only once\n"));
1285 + return (NULL);
1286 + }
1287 + seen_special = B_TRUE;
1288 + is_log = B_FALSE;
1289 + is_special = B_TRUE;
1290 + argc--;
1291 + argv++;
1292 + /*
1293 + * A special is not a real grouping device.
1294 + * We just set is_special and continue.
1295 + */
1296 + continue;
1297 + }
1298 +
1266 1299 if (is_log) {
1267 1300 if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
1268 1301 (void) fprintf(stderr,
1269 1302 gettext("invalid vdev "
1270 1303 "specification: unsupported 'log' "
1271 1304 "device: %s\n"), type);
1272 1305 return (NULL);
1273 1306 }
1274 1307 nlogs++;
1275 1308 }
1276 1309
1310 + if (is_special) {
1311 + if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
1312 + (void) fprintf(stderr,
1313 + gettext("invalid vdev "
1314 + "specification: unsupported "
1315 + "'special' device: %s\n"), type);
1316 + return (NULL);
1317 + }
1318 + nspecial++;
1319 + }
1320 +
1277 1321 for (c = 1; c < argc; c++) {
1278 1322 if (is_grouping(argv[c], NULL, NULL) != NULL)
1279 1323 break;
1280 1324 children++;
1281 1325 child = realloc(child,
1282 1326 children * sizeof (nvlist_t *));
1283 1327 if (child == NULL)
1284 1328 zpool_no_memory();
1285 - if ((nv = make_leaf_vdev(argv[c], B_FALSE))
1286 - == NULL)
1329 + if ((nv = make_leaf_vdev(argv[c],
1330 + (uint64_t)B_FALSE,
1331 + (uint64_t)B_FALSE)) == NULL)
1287 1332 return (NULL);
1288 1333 child[children - 1] = nv;
1289 1334 }
1290 1335
1291 1336 if (children < mindev) {
1292 1337 (void) fprintf(stderr, gettext("invalid vdev "
1293 1338 "specification: %s requires at least %d "
1294 1339 "devices\n"), argv[0], mindev);
1295 1340 return (NULL);
1296 1341 }
1297 1342
1298 1343 if (children > maxdev) {
1299 1344 (void) fprintf(stderr, gettext("invalid vdev "
1300 1345 "specification: %s supports no more than "
1301 1346 "%d devices\n"), argv[0], maxdev);
1302 1347 return (NULL);
1303 1348 }
1304 1349
1305 1350 argc -= c;
1306 1351 argv += c;
1307 1352
1308 1353 if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
1309 1354 spares = child;
1310 1355 nspares = children;
1311 1356 continue;
|
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
1312 1357 } else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
1313 1358 l2cache = child;
1314 1359 nl2cache = children;
1315 1360 continue;
1316 1361 } else {
1317 1362 verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
1318 1363 0) == 0);
1319 1364 verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
1320 1365 type) == 0);
1321 1366 verify(nvlist_add_uint64(nv,
1322 - ZPOOL_CONFIG_IS_LOG, is_log) == 0);
1367 + ZPOOL_CONFIG_IS_LOG,
1368 + (uint64_t)is_log) == 0);
1369 + verify(nvlist_add_uint64(nv,
1370 + ZPOOL_CONFIG_IS_SPECIAL,
1371 + (uint64_t)is_special) == 0);
1323 1372 if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
1324 1373 verify(nvlist_add_uint64(nv,
1325 1374 ZPOOL_CONFIG_NPARITY,
1326 1375 mindev - 1) == 0);
1327 1376 }
1328 1377 verify(nvlist_add_nvlist_array(nv,
1329 1378 ZPOOL_CONFIG_CHILDREN, child,
1330 1379 children) == 0);
1331 1380
1332 1381 for (c = 0; c < children; c++)
1333 1382 nvlist_free(child[c]);
1334 1383 free(child);
1335 1384 }
1336 1385 } else {
1337 1386 /*
1338 1387 * We have a device. Pass off to make_leaf_vdev() to
1339 1388 * construct the appropriate nvlist describing the vdev.
1340 1389 */
1341 - if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL)
1390 + if ((nv = make_leaf_vdev(argv[0], (uint64_t)is_log,
1391 + (uint64_t)is_special)) == NULL)
1342 1392 return (NULL);
1343 1393 if (is_log)
1344 1394 nlogs++;
1395 + if (is_special)
1396 + nspecial++;
1345 1397 argc--;
1346 1398 argv++;
1347 1399 }
1348 1400
1349 1401 toplevels++;
1350 1402 top = realloc(top, toplevels * sizeof (nvlist_t *));
1351 1403 if (top == NULL)
1352 1404 zpool_no_memory();
1353 1405 top[toplevels - 1] = nv;
1354 1406 }
1355 1407
1356 1408 if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
1357 1409 (void) fprintf(stderr, gettext("invalid vdev "
1358 1410 "specification: at least one toplevel vdev must be "
1359 1411 "specified\n"));
1360 1412 return (NULL);
1361 1413 }
1414 +
1415 + if (seen_special && nspecial == 0) {
1416 + (void) fprintf(stderr, gettext("invalid vdev specification: "
1417 + "special requires at least 1 device\n"));
1418 + return (NULL);
1419 + }
1362 1420
1363 1421 if (seen_logs && nlogs == 0) {
1364 1422 (void) fprintf(stderr, gettext("invalid vdev specification: "
1365 1423 "log requires at least 1 device\n"));
1366 1424 return (NULL);
1367 1425 }
1368 1426
1369 1427 /*
1370 1428 * Finally, create nvroot and add all top-level vdevs to it.
1371 1429 */
1372 1430 verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
1373 1431 verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
1374 1432 VDEV_TYPE_ROOT) == 0);
1375 1433 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1376 1434 top, toplevels) == 0);
1377 1435 if (nspares != 0)
1378 1436 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1379 1437 spares, nspares) == 0);
1380 1438 if (nl2cache != 0)
1381 1439 verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1382 1440 l2cache, nl2cache) == 0);
1383 1441
1384 1442 for (t = 0; t < toplevels; t++)
1385 1443 nvlist_free(top[t]);
1386 1444 for (t = 0; t < nspares; t++)
1387 1445 nvlist_free(spares[t]);
1388 1446 for (t = 0; t < nl2cache; t++)
1389 1447 nvlist_free(l2cache[t]);
1390 1448 if (spares)
1391 1449 free(spares);
1392 1450 if (l2cache)
1393 1451 free(l2cache);
1394 1452 free(top);
1395 1453
1396 1454 return (nvroot);
1397 1455 }
1398 1456
1399 1457 nvlist_t *
1400 1458 split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
1401 1459 splitflags_t flags, int argc, char **argv)
1402 1460 {
1403 1461 nvlist_t *newroot = NULL, **child;
1404 1462 uint_t c, children;
1405 1463 zpool_boot_label_t boot_type;
1406 1464
1407 1465 if (argc > 0) {
1408 1466 if ((newroot = construct_spec(argc, argv)) == NULL) {
1409 1467 (void) fprintf(stderr, gettext("Unable to build a "
1410 1468 "pool from the specified devices\n"));
1411 1469 return (NULL);
1412 1470 }
1413 1471
1414 1472 if (zpool_is_bootable(zhp))
1415 1473 boot_type = ZPOOL_COPY_BOOT_LABEL;
1416 1474 else
1417 1475 boot_type = ZPOOL_NO_BOOT_LABEL;
1418 1476
1419 1477 if (!flags.dryrun &&
1420 1478 make_disks(zhp, newroot, boot_type, 0) != 0) {
1421 1479 nvlist_free(newroot);
1422 1480 return (NULL);
1423 1481 }
1424 1482
1425 1483 /* avoid any tricks in the spec */
1426 1484 verify(nvlist_lookup_nvlist_array(newroot,
1427 1485 ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
1428 1486 for (c = 0; c < children; c++) {
1429 1487 char *path;
1430 1488 const char *type;
1431 1489 int min, max;
1432 1490
1433 1491 verify(nvlist_lookup_string(child[c],
1434 1492 ZPOOL_CONFIG_PATH, &path) == 0);
1435 1493 if ((type = is_grouping(path, &min, &max)) != NULL) {
1436 1494 (void) fprintf(stderr, gettext("Cannot use "
1437 1495 "'%s' as a device for splitting\n"), type);
1438 1496 nvlist_free(newroot);
1439 1497 return (NULL);
1440 1498 }
1441 1499 }
1442 1500 }
1443 1501
1444 1502 if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
1445 1503 nvlist_free(newroot);
1446 1504 return (NULL);
1447 1505 }
1448 1506
1449 1507 return (newroot);
1450 1508 }
1451 1509
1452 1510 /*
1453 1511 * Get and validate the contents of the given vdev specification. This ensures
1454 1512 * that the nvlist returned is well-formed, that all the devices exist, and that
1455 1513 * they are not currently in use by any other known consumer. The 'poolconfig'
1456 1514 * parameter is the current configuration of the pool when adding devices
1457 1515 * existing pool, and is used to perform additional checks, such as changing the
1458 1516 * replication level of the pool. It can be 'NULL' to indicate that this is a
1459 1517 * new pool. The 'force' flag controls whether devices should be forcefully
1460 1518 * added, even if they appear in use.
1461 1519 */
1462 1520 nvlist_t *
1463 1521 make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
1464 1522 boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type,
1465 1523 uint64_t boot_size, int argc, char **argv)
1466 1524 {
1467 1525 nvlist_t *newroot;
1468 1526 nvlist_t *poolconfig = NULL;
1469 1527 is_force = force;
1470 1528
1471 1529 /*
1472 1530 * Construct the vdev specification. If this is successful, we know
1473 1531 * that we have a valid specification, and that all devices can be
1474 1532 * opened.
1475 1533 */
1476 1534 if ((newroot = construct_spec(argc, argv)) == NULL)
1477 1535 return (NULL);
1478 1536
1479 1537 if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))
1480 1538 return (NULL);
1481 1539
1482 1540 /*
1483 1541 * Validate each device to make sure that its not shared with another
1484 1542 * subsystem. We do this even if 'force' is set, because there are some
1485 1543 * uses (such as a dedicated dump device) that even '-f' cannot
1486 1544 * override.
1487 1545 */
1488 1546 if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) {
1489 1547 nvlist_free(newroot);
1490 1548 return (NULL);
1491 1549 }
1492 1550
1493 1551 /*
1494 1552 * Check the replication level of the given vdevs and report any errors
1495 1553 * found. We include the existing pool spec, if any, as we need to
1496 1554 * catch changes against the existing replication level.
1497 1555 */
1498 1556 if (check_rep && check_replication(poolconfig, newroot) != 0) {
1499 1557 nvlist_free(newroot);
1500 1558 return (NULL);
1501 1559 }
1502 1560
1503 1561 /*
1504 1562 * Run through the vdev specification and label any whole disks found.
1505 1563 */
1506 1564 if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) {
1507 1565 nvlist_free(newroot);
1508 1566 return (NULL);
1509 1567 }
1510 1568
1511 1569 return (newroot);
1512 1570 }
|
↓ open down ↓ |
141 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX