4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
25 */
26
27 /*
28 * Virtual Device Labels
29 * ---------------------
30 *
31 * The vdev label serves several distinct purposes:
32 *
33 * 1. Uniquely identify this device as part of a ZFS pool and confirm its
34 * identity within the pool.
35 *
36 * 2. Verify that all the devices given in a configuration are present
37 * within the pool.
38 *
39 * 3. Determine the uberblock for the pool.
40 *
41 * 4. In case of an import operation, determine the configuration of the
42 * toplevel vdev of which it is a part.
43 *
44 * 5. If an import operation cannot find all the devices in the pool,
126 * features_for_read
127 * An nvlist of the features necessary for reading the MOS.
128 *
129 * Each leaf device label also contains the following:
130 *
131 * top_guid Unique ID for top-level vdev in which this is contained
132 * guid Unique ID for the leaf vdev
133 *
134 * The 'vs' configuration follows the format described in 'spa_config.c'.
135 */
136
137 #include <sys/zfs_context.h>
138 #include <sys/spa.h>
139 #include <sys/spa_impl.h>
140 #include <sys/dmu.h>
141 #include <sys/zap.h>
142 #include <sys/vdev.h>
143 #include <sys/vdev_impl.h>
144 #include <sys/uberblock_impl.h>
145 #include <sys/metaslab.h>
146 #include <sys/metaslab_impl.h>
147 #include <sys/zio.h>
148 #include <sys/dsl_scan.h>
149 #include <sys/abd.h>
150 #include <sys/fs/zfs.h>
151
152 /*
153 * Basic routines to read and write from a vdev label.
154 * Used throughout the rest of this file.
155 */
156 uint64_t
157 vdev_label_offset(uint64_t psize, int l, uint64_t offset)
158 {
159 ASSERT(offset < sizeof (vdev_label_t));
160 ASSERT(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t) == 0);
161
162 return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
163 0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
164 }
165
166 /*
200 ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL ||
201 (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
202 (SCL_CONFIG | SCL_STATE) &&
203 dsl_pool_sync_context(spa_get_dsl(zio->io_spa))));
204 ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
205
206 zio_nowait(zio_write_phys(zio, vd,
207 vdev_label_offset(vd->vdev_psize, l, offset),
208 size, buf, ZIO_CHECKSUM_LABEL, done, private,
209 ZIO_PRIORITY_SYNC_WRITE, flags, B_TRUE));
210 }
211
212 /*
213 * Generate the nvlist representing this vdev's config.
214 */
215 nvlist_t *
216 vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
217 vdev_config_flag_t flags)
218 {
219 nvlist_t *nv = NULL;
220 vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
221
222 nv = fnvlist_alloc();
223
224 fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type);
225 if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
226 fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id);
227 fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid);
228
229 if (vd->vdev_path != NULL)
230 fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path);
231
232 if (vd->vdev_devid != NULL)
233 fnvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vd->vdev_devid);
234
235 if (vd->vdev_physpath != NULL)
236 fnvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
237 vd->vdev_physpath);
238
239 if (vd->vdev_fru != NULL)
240 fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru);
248 * into a crufty old storage pool.
249 */
250 ASSERT(vd->vdev_nparity == 1 ||
251 (vd->vdev_nparity <= 2 &&
252 spa_version(spa) >= SPA_VERSION_RAIDZ2) ||
253 (vd->vdev_nparity <= 3 &&
254 spa_version(spa) >= SPA_VERSION_RAIDZ3));
255
256 /*
257 * Note that we'll add the nparity tag even on storage pools
258 * that only support a single parity device -- older software
259 * will just ignore it.
260 */
261 fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity);
262 }
263
264 if (vd->vdev_wholedisk != -1ULL)
265 fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
266 vd->vdev_wholedisk);
267
268 if (vd->vdev_not_present && !(flags & VDEV_CONFIG_MISSING))
269 fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1);
270
271 if (vd->vdev_isspare)
272 fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1);
273
274 if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
275 vd == vd->vdev_top) {
276 fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
277 vd->vdev_ms_array);
278 fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
279 vd->vdev_ms_shift);
280 fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
281 fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE,
282 vd->vdev_asize);
283 fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog);
284 if (vd->vdev_removing) {
285 fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING,
286 vd->vdev_removing);
287 }
288 }
289
290 if (vd->vdev_dtl_sm != NULL) {
291 fnvlist_add_uint64(nv, ZPOOL_CONFIG_DTL,
292 space_map_object(vd->vdev_dtl_sm));
293 }
294
295 if (vic->vic_mapping_object != 0) {
296 fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_OBJECT,
297 vic->vic_mapping_object);
298 }
299
300 if (vic->vic_births_object != 0) {
301 fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_BIRTHS,
302 vic->vic_births_object);
303 }
304
305 if (vic->vic_prev_indirect_vdev != UINT64_MAX) {
306 fnvlist_add_uint64(nv, ZPOOL_CONFIG_PREV_INDIRECT_VDEV,
307 vic->vic_prev_indirect_vdev);
308 }
309
310 if (vd->vdev_crtxg)
311 fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg);
312
313 if (flags & VDEV_CONFIG_MOS) {
314 if (vd->vdev_leaf_zap != 0) {
315 ASSERT(vd->vdev_ops->vdev_op_leaf);
316 fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_LEAF_ZAP,
317 vd->vdev_leaf_zap);
318 }
319
320 if (vd->vdev_top_zap != 0) {
321 ASSERT(vd == vd->vdev_top);
322 fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
323 vd->vdev_top_zap);
324 }
325 }
326
327 if (getstats) {
328 vdev_stat_t vs;
329
330 vdev_get_stats(vd, &vs);
331 fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
332 (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t));
333
334 /* provide either current or previous scan information */
335 pool_scan_stat_t ps;
336 if (spa_scan_get_stats(spa, &ps) == 0) {
337 fnvlist_add_uint64_array(nv,
338 ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps,
339 sizeof (pool_scan_stat_t) / sizeof (uint64_t));
340 }
341
342 pool_removal_stat_t prs;
343 if (spa_removal_get_stats(spa, &prs) == 0) {
344 fnvlist_add_uint64_array(nv,
345 ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t *)&prs,
346 sizeof (prs) / sizeof (uint64_t));
347 }
348
349 /*
350 * Note: this can be called from open context
351 * (spa_get_stats()), so we need the rwlock to prevent
352 * the mapping from being changed by condensing.
353 */
354 rw_enter(&vd->vdev_indirect_rwlock, RW_READER);
355 if (vd->vdev_indirect_mapping != NULL) {
356 ASSERT(vd->vdev_indirect_births != NULL);
357 vdev_indirect_mapping_t *vim =
358 vd->vdev_indirect_mapping;
359 fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_SIZE,
360 vdev_indirect_mapping_size(vim));
361 }
362 rw_exit(&vd->vdev_indirect_rwlock);
363 if (vd->vdev_mg != NULL &&
364 vd->vdev_mg->mg_fragmentation != ZFS_FRAG_INVALID) {
365 /*
366 * Compute approximately how much memory would be used
367 * for the indirect mapping if this device were to
368 * be removed.
369 *
370 * Note: If the frag metric is invalid, then not
371 * enough metaslabs have been converted to have
372 * histograms.
373 */
374 uint64_t seg_count = 0;
375
376 /*
377 * There are the same number of allocated segments
378 * as free segments, so we will have at least one
379 * entry per free segment.
380 */
381 for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
382 seg_count += vd->vdev_mg->mg_histogram[i];
383 }
384
385 /*
386 * The maximum length of a mapping is SPA_MAXBLOCKSIZE,
387 * so we need at least one entry per SPA_MAXBLOCKSIZE
388 * of allocated data.
389 */
390 seg_count += vd->vdev_stat.vs_alloc / SPA_MAXBLOCKSIZE;
391
392 fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_SIZE,
393 seg_count *
394 sizeof (vdev_indirect_mapping_entry_phys_t));
395 }
396 }
397
398 if (!vd->vdev_ops->vdev_op_leaf) {
399 nvlist_t **child;
400 int c, idx;
401
402 ASSERT(!vd->vdev_ishole);
403
404 child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *),
405 KM_SLEEP);
406
407 for (c = 0, idx = 0; c < vd->vdev_children; c++) {
408 vdev_t *cvd = vd->vdev_child[c];
409
410 /*
411 * If we're generating an nvlist of removing
412 * vdevs then skip over any device which is
413 * not being removed.
414 */
415 if ((flags & VDEV_CONFIG_REMOVING) &&
416 !cvd->vdev_removing)
417 continue;
449 if (vd->vdev_ishole)
450 fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, B_TRUE);
451
452 switch (vd->vdev_stat.vs_aux) {
453 case VDEV_AUX_ERR_EXCEEDED:
454 aux = "err_exceeded";
455 break;
456
457 case VDEV_AUX_EXTERNAL:
458 aux = "external";
459 break;
460 }
461
462 if (aux != NULL)
463 fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux);
464
465 if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) {
466 fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID,
467 vd->vdev_orig_guid);
468 }
469 }
470
471 return (nv);
472 }
473
474 /*
475 * Generate a view of the top-level vdevs. If we currently have holes
476 * in the namespace, then generate an array which contains a list of holey
477 * vdevs. Additionally, add the number of top-level children that currently
478 * exist.
479 */
480 void
481 vdev_top_config_generate(spa_t *spa, nvlist_t *config)
482 {
483 vdev_t *rvd = spa->spa_root_vdev;
484 uint64_t *array;
485 uint_t c, idx;
486
487 array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP);
488
489 for (c = 0, idx = 0; c < rvd->vdev_children; c++) {
490 vdev_t *tvd = rvd->vdev_child[c];
491
492 if (tvd->vdev_ishole) {
493 array[idx++] = c;
494 }
495 }
496
497 if (idx) {
498 VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY,
499 array, idx) == 0);
500 }
501
502 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
503 rvd->vdev_children) == 0);
504
505 kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
506 }
507
508 /*
509 * Returns the configuration from the label of the given vdev. For vdevs
510 * which don't have a txg value stored on their label (i.e. spares/cache)
511 * or have not been completely initialized (txg = 0) just return
512 * the configuration from the first valid label we find. Otherwise,
513 * find the most up-to-date label that does not exceed the specified
514 * 'txg' value.
515 */
1040 ASSERT(ub);
1041 ASSERT(config);
1042
1043 bzero(ub, sizeof (uberblock_t));
1044 *config = NULL;
1045
1046 cb.ubl_ubbest = ub;
1047 cb.ubl_vd = NULL;
1048
1049 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
1050 zio = zio_root(spa, NULL, &cb, flags);
1051 vdev_uberblock_load_impl(zio, rvd, flags, &cb);
1052 (void) zio_wait(zio);
1053
1054 /*
1055 * It's possible that the best uberblock was discovered on a label
1056 * that has a configuration which was written in a future txg.
1057 * Search all labels on this vdev to find the configuration that
1058 * matches the txg for our uberblock.
1059 */
1060 if (cb.ubl_vd != NULL) {
1061 vdev_dbgmsg(cb.ubl_vd, "best uberblock found for spa %s. "
1062 "txg %llu", spa->spa_name, (u_longlong_t)ub->ub_txg);
1063
1064 *config = vdev_label_read_config(cb.ubl_vd, ub->ub_txg);
1065 if (*config == NULL && spa->spa_extreme_rewind) {
1066 vdev_dbgmsg(cb.ubl_vd, "failed to read label config. "
1067 "Trying again without txg restrictions.");
1068 *config = vdev_label_read_config(cb.ubl_vd, UINT64_MAX);
1069 }
1070 if (*config == NULL) {
1071 vdev_dbgmsg(cb.ubl_vd, "failed to read label config");
1072 }
1073 }
1074 spa_config_exit(spa, SCL_ALL, FTAG);
1075 }
1076
1077 /*
1078 * On success, increment root zio's count of good writes.
1079 * We only get credit for writes to known-visible vdevs; see spa_vdev_add().
1080 */
1081 static void
1082 vdev_uberblock_sync_done(zio_t *zio)
1083 {
1084 uint64_t *good_writes = zio->io_private;
1085
1086 if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0)
1087 atomic_inc_64(good_writes);
1088 }
1089
1090 /*
1091 * Write the uberblock to all labels of all leaves of the specified vdev.
1092 */
1093 static void
1094 vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
1095 {
1096 for (uint64_t c = 0; c < vd->vdev_children; c++)
1097 vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags);
1098
1099 if (!vd->vdev_ops->vdev_op_leaf)
1100 return;
1101
1102 if (!vdev_writeable(vd))
1103 return;
1104
1105 int n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
1106
1107 /* Copy the uberblock_t into the ABD */
1108 abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
1109 abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
1110 abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t));
1111
1112 for (int l = 0; l < VDEV_LABELS; l++)
1113 vdev_label_write(zio, vd, l, ub_abd,
1114 VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
1115 vdev_uberblock_sync_done, zio->io_private,
1116 flags | ZIO_FLAG_DONT_PROPAGATE);
1123 vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
1124 {
1125 spa_t *spa = svd[0]->vdev_spa;
1126 zio_t *zio;
1127 uint64_t good_writes = 0;
1128
1129 zio = zio_root(spa, NULL, &good_writes, flags);
1130
1131 for (int v = 0; v < svdcount; v++)
1132 vdev_uberblock_sync(zio, ub, svd[v], flags);
1133
1134 (void) zio_wait(zio);
1135
1136 /*
1137 * Flush the uberblocks to disk. This ensures that the odd labels
1138 * are no longer needed (because the new uberblocks and the even
1139 * labels are safely on disk), so it is safe to overwrite them.
1140 */
1141 zio = zio_root(spa, NULL, NULL, flags);
1142
1143 for (int v = 0; v < svdcount; v++) {
1144 if (vdev_writeable(svd[v])) {
1145 zio_flush(zio, svd[v]);
1146 }
1147 }
1148
1149 (void) zio_wait(zio);
1150
1151 return (good_writes >= 1 ? 0 : EIO);
1152 }
1153
1154 /*
1155 * On success, increment the count of good writes for our top-level vdev.
1156 */
1157 static void
1158 vdev_label_sync_done(zio_t *zio)
1159 {
1160 uint64_t *good_writes = zio->io_private;
1161
1162 if (zio->io_error == 0)
1163 atomic_inc_64(good_writes);
1164 }
1165
1166 /*
1167 * If there weren't enough good writes, indicate failure to the parent.
|
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
26 */
27
28 /*
29 * Virtual Device Labels
30 * ---------------------
31 *
32 * The vdev label serves several distinct purposes:
33 *
34 * 1. Uniquely identify this device as part of a ZFS pool and confirm its
35 * identity within the pool.
36 *
37 * 2. Verify that all the devices given in a configuration are present
38 * within the pool.
39 *
40 * 3. Determine the uberblock for the pool.
41 *
42 * 4. In case of an import operation, determine the configuration of the
43 * toplevel vdev of which it is a part.
44 *
45 * 5. If an import operation cannot find all the devices in the pool,
127 * features_for_read
128 * An nvlist of the features necessary for reading the MOS.
129 *
130 * Each leaf device label also contains the following:
131 *
132 * top_guid Unique ID for top-level vdev in which this is contained
133 * guid Unique ID for the leaf vdev
134 *
135 * The 'vs' configuration follows the format described in 'spa_config.c'.
136 */
137
138 #include <sys/zfs_context.h>
139 #include <sys/spa.h>
140 #include <sys/spa_impl.h>
141 #include <sys/dmu.h>
142 #include <sys/zap.h>
143 #include <sys/vdev.h>
144 #include <sys/vdev_impl.h>
145 #include <sys/uberblock_impl.h>
146 #include <sys/metaslab.h>
147 #include <sys/zio.h>
148 #include <sys/dsl_scan.h>
149 #include <sys/abd.h>
150 #include <sys/fs/zfs.h>
151
152 /*
153 * Basic routines to read and write from a vdev label.
154 * Used throughout the rest of this file.
155 */
156 uint64_t
157 vdev_label_offset(uint64_t psize, int l, uint64_t offset)
158 {
159 ASSERT(offset < sizeof (vdev_label_t));
160 ASSERT(P2PHASE_TYPED(psize, sizeof (vdev_label_t), uint64_t) == 0);
161
162 return (offset + l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
163 0 : psize - VDEV_LABELS * sizeof (vdev_label_t)));
164 }
165
166 /*
200 ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL ||
201 (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
202 (SCL_CONFIG | SCL_STATE) &&
203 dsl_pool_sync_context(spa_get_dsl(zio->io_spa))));
204 ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
205
206 zio_nowait(zio_write_phys(zio, vd,
207 vdev_label_offset(vd->vdev_psize, l, offset),
208 size, buf, ZIO_CHECKSUM_LABEL, done, private,
209 ZIO_PRIORITY_SYNC_WRITE, flags, B_TRUE));
210 }
211
212 /*
213 * Generate the nvlist representing this vdev's config.
214 */
215 nvlist_t *
216 vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
217 vdev_config_flag_t flags)
218 {
219 nvlist_t *nv = NULL;
220
221 nv = fnvlist_alloc();
222
223 fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type);
224 if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
225 fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id);
226 fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid);
227
228 if (vd->vdev_path != NULL)
229 fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path);
230
231 if (vd->vdev_devid != NULL)
232 fnvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vd->vdev_devid);
233
234 if (vd->vdev_physpath != NULL)
235 fnvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
236 vd->vdev_physpath);
237
238 if (vd->vdev_fru != NULL)
239 fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru);
247 * into a crufty old storage pool.
248 */
249 ASSERT(vd->vdev_nparity == 1 ||
250 (vd->vdev_nparity <= 2 &&
251 spa_version(spa) >= SPA_VERSION_RAIDZ2) ||
252 (vd->vdev_nparity <= 3 &&
253 spa_version(spa) >= SPA_VERSION_RAIDZ3));
254
255 /*
256 * Note that we'll add the nparity tag even on storage pools
257 * that only support a single parity device -- older software
258 * will just ignore it.
259 */
260 fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity);
261 }
262
263 if (vd->vdev_wholedisk != -1ULL)
264 fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
265 vd->vdev_wholedisk);
266
267 if (vd->vdev_not_present)
268 fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1);
269
270 if (vd->vdev_isspare)
271 fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1);
272
273 if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
274 vd == vd->vdev_top) {
275 fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
276 vd->vdev_ms_array);
277 fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
278 vd->vdev_ms_shift);
279 fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
280 fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE,
281 vd->vdev_asize);
282 fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog);
283 fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPECIAL,
284 vd->vdev_isspecial);
285 if (vd->vdev_removing)
286 fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING,
287 vd->vdev_removing);
288 }
289
290 if (flags & VDEV_CONFIG_L2CACHE)
291 /* indicate that we support L2ARC persistency */
292 VERIFY(nvlist_add_boolean_value(nv,
293 ZPOOL_CONFIG_L2CACHE_PERSISTENT, B_TRUE) == 0);
294
295 fnvlist_add_boolean_value(nv, ZPOOL_CONFIG_IS_SSD, vd->vdev_is_ssd);
296
297 if (vd->vdev_dtl_sm != NULL) {
298 fnvlist_add_uint64(nv, ZPOOL_CONFIG_DTL,
299 space_map_object(vd->vdev_dtl_sm));
300 }
301
302 if (vd->vdev_crtxg)
303 fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg);
304
305 if (flags & VDEV_CONFIG_MOS) {
306 if (vd->vdev_leaf_zap != 0) {
307 ASSERT(vd->vdev_ops->vdev_op_leaf);
308 fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_LEAF_ZAP,
309 vd->vdev_leaf_zap);
310 }
311
312 if (vd->vdev_top_zap != 0) {
313 ASSERT(vd == vd->vdev_top);
314 fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
315 vd->vdev_top_zap);
316 }
317 }
318
319 if (getstats) {
320 vdev_stat_t vs;
321 pool_scan_stat_t ps;
322
323 vdev_get_stats(vd, &vs);
324 fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
325 (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t));
326
327 /* provide either current or previous scan information */
328 if (spa_scan_get_stats(spa, &ps) == 0) {
329 fnvlist_add_uint64_array(nv,
330 ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps,
331 sizeof (pool_scan_stat_t) / sizeof (uint64_t));
332 }
333 }
334
335 if (!vd->vdev_ops->vdev_op_leaf) {
336 nvlist_t **child;
337 int c, idx;
338
339 ASSERT(!vd->vdev_ishole);
340
341 child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *),
342 KM_SLEEP);
343
344 for (c = 0, idx = 0; c < vd->vdev_children; c++) {
345 vdev_t *cvd = vd->vdev_child[c];
346
347 /*
348 * If we're generating an nvlist of removing
349 * vdevs then skip over any device which is
350 * not being removed.
351 */
352 if ((flags & VDEV_CONFIG_REMOVING) &&
353 !cvd->vdev_removing)
354 continue;
386 if (vd->vdev_ishole)
387 fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, B_TRUE);
388
389 switch (vd->vdev_stat.vs_aux) {
390 case VDEV_AUX_ERR_EXCEEDED:
391 aux = "err_exceeded";
392 break;
393
394 case VDEV_AUX_EXTERNAL:
395 aux = "external";
396 break;
397 }
398
399 if (aux != NULL)
400 fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux);
401
402 if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) {
403 fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID,
404 vd->vdev_orig_guid);
405 }
406
407 /* grab per-leaf-vdev trim stats */
408 if (getstats) {
409 fnvlist_add_uint64(nv, ZPOOL_CONFIG_TRIM_PROG,
410 vd->vdev_trim_prog);
411 }
412 }
413
414 return (nv);
415 }
416
417 /*
418 * Generate a view of the top-level vdevs. If we currently have holes
419 * in the namespace, then generate an array which contains a list of holey
420 * vdevs. Additionally, add the number of top-level children that currently
421 * exist.
422 */
423 void
424 vdev_top_config_generate(spa_t *spa, nvlist_t *config)
425 {
426 vdev_t *rvd = spa->spa_root_vdev;
427 uint64_t *array;
428 uint_t c, idx;
429
430 array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP);
431
432 for (c = 0, idx = 0; c < rvd->vdev_children; c++) {
433 vdev_t *tvd = rvd->vdev_child[c];
434
435 if (tvd->vdev_ishole)
436 array[idx++] = c;
437 }
438
439 if (idx) {
440 VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY,
441 array, idx) == 0);
442 }
443
444 VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
445 rvd->vdev_children) == 0);
446
447 kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
448 }
449
450 /*
451 * Returns the configuration from the label of the given vdev. For vdevs
452 * which don't have a txg value stored on their label (i.e. spares/cache)
453 * or have not been completely initialized (txg = 0) just return
454 * the configuration from the first valid label we find. Otherwise,
455 * find the most up-to-date label that does not exceed the specified
456 * 'txg' value.
457 */
982 ASSERT(ub);
983 ASSERT(config);
984
985 bzero(ub, sizeof (uberblock_t));
986 *config = NULL;
987
988 cb.ubl_ubbest = ub;
989 cb.ubl_vd = NULL;
990
991 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
992 zio = zio_root(spa, NULL, &cb, flags);
993 vdev_uberblock_load_impl(zio, rvd, flags, &cb);
994 (void) zio_wait(zio);
995
996 /*
997 * It's possible that the best uberblock was discovered on a label
998 * that has a configuration which was written in a future txg.
999 * Search all labels on this vdev to find the configuration that
1000 * matches the txg for our uberblock.
1001 */
1002 if (cb.ubl_vd != NULL)
1003 *config = vdev_label_read_config(cb.ubl_vd, ub->ub_txg);
1004 spa_config_exit(spa, SCL_ALL, FTAG);
1005 }
1006
1007 /*
1008 * On success, increment root zio's count of good writes.
1009 * We only get credit for writes to known-visible vdevs; see spa_vdev_add().
1010 */
1011 static void
1012 vdev_uberblock_sync_done(zio_t *zio)
1013 {
1014 uint64_t *good_writes = zio->io_private;
1015
1016 if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0)
1017 atomic_inc_64(good_writes);
1018 }
1019
1020 /*
1021 * Write the uberblock to all labels of all leaves of the specified vdev.
1022 */
1023 static void
1024 vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
1025 {
1026 for (int c = 0; c < vd->vdev_children; c++)
1027 vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags);
1028
1029 if (!vd->vdev_ops->vdev_op_leaf)
1030 return;
1031
1032 if (!vdev_writeable(vd))
1033 return;
1034
1035 int n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
1036
1037 /* Copy the uberblock_t into the ABD */
1038 abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
1039 abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
1040 abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t));
1041
1042 for (int l = 0; l < VDEV_LABELS; l++)
1043 vdev_label_write(zio, vd, l, ub_abd,
1044 VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
1045 vdev_uberblock_sync_done, zio->io_private,
1046 flags | ZIO_FLAG_DONT_PROPAGATE);
1053 vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
1054 {
1055 spa_t *spa = svd[0]->vdev_spa;
1056 zio_t *zio;
1057 uint64_t good_writes = 0;
1058
1059 zio = zio_root(spa, NULL, &good_writes, flags);
1060
1061 for (int v = 0; v < svdcount; v++)
1062 vdev_uberblock_sync(zio, ub, svd[v], flags);
1063
1064 (void) zio_wait(zio);
1065
1066 /*
1067 * Flush the uberblocks to disk. This ensures that the odd labels
1068 * are no longer needed (because the new uberblocks and the even
1069 * labels are safely on disk), so it is safe to overwrite them.
1070 */
1071 zio = zio_root(spa, NULL, NULL, flags);
1072
1073 for (int v = 0; v < svdcount; v++)
1074 zio_flush(zio, svd[v]);
1075
1076 (void) zio_wait(zio);
1077
1078 return (good_writes >= 1 ? 0 : EIO);
1079 }
1080
1081 /*
1082 * On success, increment the count of good writes for our top-level vdev.
1083 */
1084 static void
1085 vdev_label_sync_done(zio_t *zio)
1086 {
1087 uint64_t *good_writes = zio->io_private;
1088
1089 if (zio->io_error == 0)
1090 atomic_inc_64(good_writes);
1091 }
1092
1093 /*
1094 * If there weren't enough good writes, indicate failure to the parent.
|