Print this page
NEX-7397 Hotspare didn't kick in automatically when one of the drive in pool went "Faulty" (is_ssd fix)
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
NEX-2846 Enable Automatic/Intelligent Hot Sparing capability
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
6414 vdev_config_sync could be simpler
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Robert Mustacchi <rm@joyent.com>
6368 Remove superfluous statement
Reviewed-by: Ned Bass <bass6@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Will Andrews <will@freebsd.org>
Approved by: Robert Mustacchi <rm@joyent.com>
6386 Fix function call with uninitialized value in vdev_inuse
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Approved by: Robert Mustacchi <rm@joyent.com>
6328 Fix cstyle errors in zfs codebase (fix studio)
6328 Fix cstyle errors in zfs codebase
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Alex Reece <alex@delphix.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed by: Jorgen Lundman <lundman@lundman.net>
Approved by: Robert Mustacchi <rm@joyent.com>
NEX-3984 On-demand TRIM
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Conflicts:
        usr/src/common/zfs/zpool_prop.c
        usr/src/uts/common/sys/fs/zfs.h
NEX-3541 Implement persistent L2ARC
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Conflicts:
        usr/src/uts/common/fs/zfs/sys/spa.h
4121 vdev_label_init should treat request as succeeded when pool is read only
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
Fixup merge results
re #12585 rb4049 ZFS++ work port - refactoring to improve separation of open/closed code, bug fixes, performance improvements - open code
Bug 11205: add missing libzfs_closed_stubs.c to fix opensource-only build.
ZFS plus work: special vdevs, cos, cos/vdev properties

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/vdev_label.c
          +++ new/usr/src/uts/common/fs/zfs/vdev_label.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24      - * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
       24 + * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
       25 + * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  25   26   */
  26   27  
  27   28  /*
  28   29   * Virtual Device Labels
  29   30   * ---------------------
  30   31   *
  31   32   * The vdev label serves several distinct purposes:
  32   33   *
  33   34   *      1. Uniquely identify this device as part of a ZFS pool and confirm its
  34   35   *         identity within the pool.
↓ open down ↓ 101 lines elided ↑ open up ↑
 136  137  
 137  138  #include <sys/zfs_context.h>
 138  139  #include <sys/spa.h>
 139  140  #include <sys/spa_impl.h>
 140  141  #include <sys/dmu.h>
 141  142  #include <sys/zap.h>
 142  143  #include <sys/vdev.h>
 143  144  #include <sys/vdev_impl.h>
 144  145  #include <sys/uberblock_impl.h>
 145  146  #include <sys/metaslab.h>
 146      -#include <sys/metaslab_impl.h>
 147  147  #include <sys/zio.h>
 148  148  #include <sys/dsl_scan.h>
 149  149  #include <sys/abd.h>
 150  150  #include <sys/fs/zfs.h>
 151  151  
 152  152  /*
 153  153   * Basic routines to read and write from a vdev label.
 154  154   * Used throughout the rest of this file.
 155  155   */
 156  156  uint64_t
↓ open down ↓ 53 lines elided ↑ open up ↑
 210  210  }
 211  211  
 212  212  /*
 213  213   * Generate the nvlist representing this vdev's config.
 214  214   */
 215  215  nvlist_t *
 216  216  vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
 217  217      vdev_config_flag_t flags)
 218  218  {
 219  219          nvlist_t *nv = NULL;
 220      -        vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
 221  220  
 222  221          nv = fnvlist_alloc();
 223  222  
 224  223          fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type);
 225  224          if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)))
 226  225                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_ID, vd->vdev_id);
 227  226          fnvlist_add_uint64(nv, ZPOOL_CONFIG_GUID, vd->vdev_guid);
 228  227  
 229  228          if (vd->vdev_path != NULL)
 230  229                  fnvlist_add_string(nv, ZPOOL_CONFIG_PATH, vd->vdev_path);
↓ open down ↓ 27 lines elided ↑ open up ↑
 258  257                   * that only support a single parity device -- older software
 259  258                   * will just ignore it.
 260  259                   */
 261  260                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, vd->vdev_nparity);
 262  261          }
 263  262  
 264  263          if (vd->vdev_wholedisk != -1ULL)
 265  264                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 266  265                      vd->vdev_wholedisk);
 267  266  
 268      -        if (vd->vdev_not_present && !(flags & VDEV_CONFIG_MISSING))
      267 +        if (vd->vdev_not_present)
 269  268                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, 1);
 270  269  
 271  270          if (vd->vdev_isspare)
 272  271                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPARE, 1);
 273  272  
 274  273          if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE)) &&
 275  274              vd == vd->vdev_top) {
 276  275                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_ARRAY,
 277  276                      vd->vdev_ms_array);
 278  277                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_METASLAB_SHIFT,
 279  278                      vd->vdev_ms_shift);
 280  279                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASHIFT, vd->vdev_ashift);
 281  280                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_ASIZE,
 282  281                      vd->vdev_asize);
 283  282                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_LOG, vd->vdev_islog);
 284      -                if (vd->vdev_removing) {
      283 +                fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_SPECIAL,
      284 +                    vd->vdev_isspecial);
      285 +                if (vd->vdev_removing)
 285  286                          fnvlist_add_uint64(nv, ZPOOL_CONFIG_REMOVING,
 286  287                              vd->vdev_removing);
 287      -                }
 288  288          }
 289  289  
      290 +        if (flags & VDEV_CONFIG_L2CACHE)
      291 +                /* indicate that we support L2ARC persistency */
      292 +                VERIFY(nvlist_add_boolean_value(nv,
      293 +                    ZPOOL_CONFIG_L2CACHE_PERSISTENT, B_TRUE) == 0);
      294 +
      295 +        fnvlist_add_boolean_value(nv, ZPOOL_CONFIG_IS_SSD, vd->vdev_is_ssd);
      296 +
 290  297          if (vd->vdev_dtl_sm != NULL) {
 291  298                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_DTL,
 292  299                      space_map_object(vd->vdev_dtl_sm));
 293  300          }
 294  301  
 295      -        if (vic->vic_mapping_object != 0) {
 296      -                fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_OBJECT,
 297      -                    vic->vic_mapping_object);
 298      -        }
 299      -
 300      -        if (vic->vic_births_object != 0) {
 301      -                fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_BIRTHS,
 302      -                    vic->vic_births_object);
 303      -        }
 304      -
 305      -        if (vic->vic_prev_indirect_vdev != UINT64_MAX) {
 306      -                fnvlist_add_uint64(nv, ZPOOL_CONFIG_PREV_INDIRECT_VDEV,
 307      -                    vic->vic_prev_indirect_vdev);
 308      -        }
 309      -
 310  302          if (vd->vdev_crtxg)
 311  303                  fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg);
 312  304  
 313  305          if (flags & VDEV_CONFIG_MOS) {
 314  306                  if (vd->vdev_leaf_zap != 0) {
 315  307                          ASSERT(vd->vdev_ops->vdev_op_leaf);
 316  308                          fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_LEAF_ZAP,
 317  309                              vd->vdev_leaf_zap);
 318  310                  }
 319  311  
 320  312                  if (vd->vdev_top_zap != 0) {
 321  313                          ASSERT(vd == vd->vdev_top);
 322  314                          fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
 323  315                              vd->vdev_top_zap);
 324  316                  }
 325  317          }
 326  318  
 327  319          if (getstats) {
 328  320                  vdev_stat_t vs;
      321 +                pool_scan_stat_t ps;
 329  322  
 330  323                  vdev_get_stats(vd, &vs);
 331  324                  fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 332  325                      (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t));
 333  326  
 334  327                  /* provide either current or previous scan information */
 335      -                pool_scan_stat_t ps;
 336  328                  if (spa_scan_get_stats(spa, &ps) == 0) {
 337  329                          fnvlist_add_uint64_array(nv,
 338  330                              ZPOOL_CONFIG_SCAN_STATS, (uint64_t *)&ps,
 339  331                              sizeof (pool_scan_stat_t) / sizeof (uint64_t));
 340  332                  }
 341      -
 342      -                pool_removal_stat_t prs;
 343      -                if (spa_removal_get_stats(spa, &prs) == 0) {
 344      -                        fnvlist_add_uint64_array(nv,
 345      -                            ZPOOL_CONFIG_REMOVAL_STATS, (uint64_t *)&prs,
 346      -                            sizeof (prs) / sizeof (uint64_t));
 347      -                }
 348      -
 349      -                /*
 350      -                 * Note: this can be called from open context
 351      -                 * (spa_get_stats()), so we need the rwlock to prevent
 352      -                 * the mapping from being changed by condensing.
 353      -                 */
 354      -                rw_enter(&vd->vdev_indirect_rwlock, RW_READER);
 355      -                if (vd->vdev_indirect_mapping != NULL) {
 356      -                        ASSERT(vd->vdev_indirect_births != NULL);
 357      -                        vdev_indirect_mapping_t *vim =
 358      -                            vd->vdev_indirect_mapping;
 359      -                        fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_SIZE,
 360      -                            vdev_indirect_mapping_size(vim));
 361      -                }
 362      -                rw_exit(&vd->vdev_indirect_rwlock);
 363      -                if (vd->vdev_mg != NULL &&
 364      -                    vd->vdev_mg->mg_fragmentation != ZFS_FRAG_INVALID) {
 365      -                        /*
 366      -                         * Compute approximately how much memory would be used
 367      -                         * for the indirect mapping if this device were to
 368      -                         * be removed.
 369      -                         *
 370      -                         * Note: If the frag metric is invalid, then not
 371      -                         * enough metaslabs have been converted to have
 372      -                         * histograms.
 373      -                         */
 374      -                        uint64_t seg_count = 0;
 375      -
 376      -                        /*
 377      -                         * There are the same number of allocated segments
 378      -                         * as free segments, so we will have at least one
 379      -                         * entry per free segment.
 380      -                         */
 381      -                        for (int i = 0; i < RANGE_TREE_HISTOGRAM_SIZE; i++) {
 382      -                                seg_count += vd->vdev_mg->mg_histogram[i];
 383      -                        }
 384      -
 385      -                        /*
 386      -                         * The maximum length of a mapping is SPA_MAXBLOCKSIZE,
 387      -                         * so we need at least one entry per SPA_MAXBLOCKSIZE
 388      -                         * of allocated data.
 389      -                         */
 390      -                        seg_count += vd->vdev_stat.vs_alloc / SPA_MAXBLOCKSIZE;
 391      -
 392      -                        fnvlist_add_uint64(nv, ZPOOL_CONFIG_INDIRECT_SIZE,
 393      -                            seg_count *
 394      -                            sizeof (vdev_indirect_mapping_entry_phys_t));
 395      -                }
 396  333          }
 397  334  
 398  335          if (!vd->vdev_ops->vdev_op_leaf) {
 399  336                  nvlist_t **child;
 400  337                  int c, idx;
 401  338  
 402  339                  ASSERT(!vd->vdev_ishole);
 403  340  
 404  341                  child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *),
 405  342                      KM_SLEEP);
↓ open down ↓ 53 lines elided ↑ open up ↑
 459  396                          break;
 460  397                  }
 461  398  
 462  399                  if (aux != NULL)
 463  400                          fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux);
 464  401  
 465  402                  if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) {
 466  403                          fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID,
 467  404                              vd->vdev_orig_guid);
 468  405                  }
      406 +
      407 +                /* grab per-leaf-vdev trim stats */
      408 +                if (getstats) {
      409 +                        fnvlist_add_uint64(nv, ZPOOL_CONFIG_TRIM_PROG,
      410 +                            vd->vdev_trim_prog);
      411 +                }
 469  412          }
 470  413  
 471  414          return (nv);
 472  415  }
 473  416  
 474  417  /*
 475  418   * Generate a view of the top-level vdevs.  If we currently have holes
 476  419   * in the namespace, then generate an array which contains a list of holey
 477  420   * vdevs.  Additionally, add the number of top-level children that currently
 478  421   * exist.
↓ open down ↓ 3 lines elided ↑ open up ↑
 482  425  {
 483  426          vdev_t *rvd = spa->spa_root_vdev;
 484  427          uint64_t *array;
 485  428          uint_t c, idx;
 486  429  
 487  430          array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP);
 488  431  
 489  432          for (c = 0, idx = 0; c < rvd->vdev_children; c++) {
 490  433                  vdev_t *tvd = rvd->vdev_child[c];
 491  434  
 492      -                if (tvd->vdev_ishole) {
      435 +                if (tvd->vdev_ishole)
 493  436                          array[idx++] = c;
 494      -                }
 495  437          }
 496  438  
 497  439          if (idx) {
 498  440                  VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_HOLE_ARRAY,
 499  441                      array, idx) == 0);
 500  442          }
 501  443  
 502  444          VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
 503  445              rvd->vdev_children) == 0);
 504  446  
↓ open down ↓ 545 lines elided ↑ open up ↑
1050  992          zio = zio_root(spa, NULL, &cb, flags);
1051  993          vdev_uberblock_load_impl(zio, rvd, flags, &cb);
1052  994          (void) zio_wait(zio);
1053  995  
1054  996          /*
1055  997           * It's possible that the best uberblock was discovered on a label
1056  998           * that has a configuration which was written in a future txg.
1057  999           * Search all labels on this vdev to find the configuration that
1058 1000           * matches the txg for our uberblock.
1059 1001           */
1060      -        if (cb.ubl_vd != NULL) {
1061      -                vdev_dbgmsg(cb.ubl_vd, "best uberblock found for spa %s. "
1062      -                    "txg %llu", spa->spa_name, (u_longlong_t)ub->ub_txg);
1063      -
     1002 +        if (cb.ubl_vd != NULL)
1064 1003                  *config = vdev_label_read_config(cb.ubl_vd, ub->ub_txg);
1065      -                if (*config == NULL && spa->spa_extreme_rewind) {
1066      -                        vdev_dbgmsg(cb.ubl_vd, "failed to read label config. "
1067      -                            "Trying again without txg restrictions.");
1068      -                        *config = vdev_label_read_config(cb.ubl_vd, UINT64_MAX);
1069      -                }
1070      -                if (*config == NULL) {
1071      -                        vdev_dbgmsg(cb.ubl_vd, "failed to read label config");
1072      -                }
1073      -        }
1074 1004          spa_config_exit(spa, SCL_ALL, FTAG);
1075 1005  }
1076 1006  
1077 1007  /*
1078 1008   * On success, increment root zio's count of good writes.
1079 1009   * We only get credit for writes to known-visible vdevs; see spa_vdev_add().
1080 1010   */
1081 1011  static void
1082 1012  vdev_uberblock_sync_done(zio_t *zio)
1083 1013  {
↓ open down ↓ 2 lines elided ↑ open up ↑
1086 1016          if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0)
1087 1017                  atomic_inc_64(good_writes);
1088 1018  }
1089 1019  
1090 1020  /*
1091 1021   * Write the uberblock to all labels of all leaves of the specified vdev.
1092 1022   */
1093 1023  static void
1094 1024  vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
1095 1025  {
1096      -        for (uint64_t c = 0; c < vd->vdev_children; c++)
     1026 +        for (int c = 0; c < vd->vdev_children; c++)
1097 1027                  vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags);
1098 1028  
1099 1029          if (!vd->vdev_ops->vdev_op_leaf)
1100 1030                  return;
1101 1031  
1102 1032          if (!vdev_writeable(vd))
1103 1033                  return;
1104 1034  
1105 1035          int n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
1106 1036  
↓ open down ↓ 26 lines elided ↑ open up ↑
1133 1063  
1134 1064          (void) zio_wait(zio);
1135 1065  
1136 1066          /*
1137 1067           * Flush the uberblocks to disk.  This ensures that the odd labels
1138 1068           * are no longer needed (because the new uberblocks and the even
1139 1069           * labels are safely on disk), so it is safe to overwrite them.
1140 1070           */
1141 1071          zio = zio_root(spa, NULL, NULL, flags);
1142 1072  
1143      -        for (int v = 0; v < svdcount; v++) {
1144      -                if (vdev_writeable(svd[v])) {
1145      -                        zio_flush(zio, svd[v]);
1146      -                }
1147      -        }
     1073 +        for (int v = 0; v < svdcount; v++)
     1074 +                zio_flush(zio, svd[v]);
1148 1075  
1149 1076          (void) zio_wait(zio);
1150 1077  
1151 1078          return (good_writes >= 1 ? 0 : EIO);
1152 1079  }
1153 1080  
1154 1081  /*
1155 1082   * On success, increment the count of good writes for our top-level vdev.
1156 1083   */
1157 1084  static void
↓ open down ↓ 232 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX