Print this page
2619 asynchronous destruction of ZFS file systems
2747 SPA versioning with zfs feature flags
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <gwilson@delphix.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: Dan Kruchinin <dan.kruchinin@gmail.com>
Approved by: Dan McDonald <danmcd@nexenta.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/spa.c
          +++ new/usr/src/uts/common/fs/zfs/spa.c
↓ open down ↓ 54 lines elided ↑ open up ↑
  55   55  #include <sys/dsl_dir.h>
  56   56  #include <sys/dsl_prop.h>
  57   57  #include <sys/dsl_synctask.h>
  58   58  #include <sys/fs/zfs.h>
  59   59  #include <sys/arc.h>
  60   60  #include <sys/callb.h>
  61   61  #include <sys/systeminfo.h>
  62   62  #include <sys/spa_boot.h>
  63   63  #include <sys/zfs_ioctl.h>
  64   64  #include <sys/dsl_scan.h>
       65 +#include <sys/zfeature.h>
  65   66  
  66   67  #ifdef  _KERNEL
  67   68  #include <sys/bootprops.h>
  68   69  #include <sys/callb.h>
  69   70  #include <sys/cpupart.h>
  70   71  #include <sys/pool.h>
  71   72  #include <sys/sysdc.h>
  72   73  #include <sys/zone.h>
  73   74  #endif  /* _KERNEL */
  74   75  
↓ open down ↓ 31 lines elided ↑ open up ↑
 106  107  const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
 107  108          /* ISSUE        ISSUE_HIGH      INTR            INTR_HIGH */
 108  109          { ZTI_ONE,      ZTI_NULL,       ZTI_ONE,        ZTI_NULL },
 109  110          { ZTI_FIX(8),   ZTI_NULL,       ZTI_BATCH,      ZTI_NULL },
 110  111          { ZTI_BATCH,    ZTI_FIX(5),     ZTI_FIX(8),     ZTI_FIX(5) },
 111  112          { ZTI_FIX(100), ZTI_NULL,       ZTI_ONE,        ZTI_NULL },
 112  113          { ZTI_ONE,      ZTI_NULL,       ZTI_ONE,        ZTI_NULL },
 113  114          { ZTI_ONE,      ZTI_NULL,       ZTI_ONE,        ZTI_NULL },
 114  115  };
 115  116  
      117 +static dsl_syncfunc_t spa_sync_version;
 116  118  static dsl_syncfunc_t spa_sync_props;
 117  119  static boolean_t spa_has_active_shared_spare(spa_t *spa);
 118  120  static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
 119  121      spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
 120  122      char **ereport);
 121  123  static void spa_vdev_resilver_done(spa_t *spa);
 122  124  
 123  125  uint_t          zio_taskq_batch_pct = 100;      /* 1 thread per cpu in pset */
 124  126  id_t            zio_taskq_psrset_bind = PS_NONE;
 125  127  boolean_t       zio_taskq_sysdc = B_TRUE;       /* use SDC scheduling class */
↓ open down ↓ 35 lines elided ↑ open up ↑
 161  163          nvlist_free(propval);
 162  164  }
 163  165  
 164  166  /*
 165  167   * Get property values from the spa configuration.
 166  168   */
 167  169  static void
 168  170  spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
 169  171  {
 170  172          vdev_t *rvd = spa->spa_root_vdev;
      173 +        dsl_pool_t *pool = spa->spa_dsl_pool;
 171  174          uint64_t size;
 172  175          uint64_t alloc;
 173  176          uint64_t space;
 174  177          uint64_t cap, version;
 175  178          zprop_source_t src = ZPROP_SRC_NONE;
 176  179          spa_config_dirent_t *dp;
 177  180  
 178  181          ASSERT(MUTEX_HELD(&spa->spa_props_lock));
 179  182  
 180  183          if (rvd != NULL) {
↓ open down ↓ 26 lines elided ↑ open up ↑
 207  210                      rvd->vdev_state, src);
 208  211  
 209  212                  version = spa_version(spa);
 210  213                  if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
 211  214                          src = ZPROP_SRC_DEFAULT;
 212  215                  else
 213  216                          src = ZPROP_SRC_LOCAL;
 214  217                  spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
 215  218          }
 216  219  
      220 +        if (pool != NULL) {
      221 +                dsl_dir_t *freedir = pool->dp_free_dir;
      222 +
      223 +                /*
      224 +                 * The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
      225 +                 * when opening pools before this version freedir will be NULL.
      226 +                 */
      227 +                if (freedir != NULL) {
      228 +                        spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
      229 +                            freedir->dd_phys->dd_used_bytes, src);
      230 +                } else {
      231 +                        spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
      232 +                            NULL, 0, src);
      233 +                }
      234 +        }
      235 +
 217  236          spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
 218  237  
 219  238          if (spa->spa_comment != NULL) {
 220  239                  spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
 221  240                      0, ZPROP_SRC_LOCAL);
 222  241          }
 223  242  
 224  243          if (spa->spa_root != NULL)
 225  244                  spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
 226  245                      0, ZPROP_SRC_LOCAL);
↓ open down ↓ 119 lines elided ↑ open up ↑
 346  365  /*
 347  366   * Validate the given pool properties nvlist and modify the list
 348  367   * for the property values to be set.
 349  368   */
 350  369  static int
 351  370  spa_prop_validate(spa_t *spa, nvlist_t *props)
 352  371  {
 353  372          nvpair_t *elem;
 354  373          int error = 0, reset_bootfs = 0;
 355  374          uint64_t objnum;
      375 +        boolean_t has_feature = B_FALSE;
 356  376  
 357  377          elem = NULL;
 358  378          while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 359      -                zpool_prop_t prop;
 360      -                char *propname, *strval;
 361  379                  uint64_t intval;
 362      -                objset_t *os;
 363      -                char *slash, *check;
      380 +                char *strval, *slash, *check, *fname;
      381 +                const char *propname = nvpair_name(elem);
      382 +                zpool_prop_t prop = zpool_name_to_prop(propname);
 364  383  
 365      -                propname = nvpair_name(elem);
      384 +                switch (prop) {
      385 +                case ZPROP_INVAL:
      386 +                        if (!zpool_prop_feature(propname)) {
      387 +                                error = EINVAL;
      388 +                                break;
      389 +                        }
 366  390  
 367      -                if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
 368      -                        return (EINVAL);
      391 +                        /*
      392 +                         * Sanitize the input.
      393 +                         */
      394 +                        if (nvpair_type(elem) != DATA_TYPE_UINT64) {
      395 +                                error = EINVAL;
      396 +                                break;
      397 +                        }
 369  398  
 370      -                switch (prop) {
      399 +                        if (nvpair_value_uint64(elem, &intval) != 0) {
      400 +                                error = EINVAL;
      401 +                                break;
      402 +                        }
      403 +
      404 +                        if (intval != 0) {
      405 +                                error = EINVAL;
      406 +                                break;
      407 +                        }
      408 +
      409 +                        fname = strchr(propname, '@') + 1;
      410 +                        if (zfeature_lookup_name(fname, NULL) != 0) {
      411 +                                error = EINVAL;
      412 +                                break;
      413 +                        }
      414 +
      415 +                        has_feature = B_TRUE;
      416 +                        break;
      417 +
 371  418                  case ZPOOL_PROP_VERSION:
 372  419                          error = nvpair_value_uint64(elem, &intval);
 373  420                          if (!error &&
 374      -                            (intval < spa_version(spa) || intval > SPA_VERSION))
      421 +                            (intval < spa_version(spa) ||
      422 +                            intval > SPA_VERSION_BEFORE_FEATURES ||
      423 +                            has_feature))
 375  424                                  error = EINVAL;
 376  425                          break;
 377  426  
 378  427                  case ZPOOL_PROP_DELEGATION:
 379  428                  case ZPOOL_PROP_AUTOREPLACE:
 380  429                  case ZPOOL_PROP_LISTSNAPS:
 381  430                  case ZPOOL_PROP_AUTOEXPAND:
 382  431                          error = nvpair_value_uint64(elem, &intval);
 383  432                          if (!error && intval > 1)
 384  433                                  error = EINVAL;
↓ open down ↓ 16 lines elided ↑ open up ↑
 401  450                          if (!vdev_is_bootable(spa->spa_root_vdev)) {
 402  451                                  error = ENOTSUP;
 403  452                                  break;
 404  453                          }
 405  454  
 406  455                          reset_bootfs = 1;
 407  456  
 408  457                          error = nvpair_value_string(elem, &strval);
 409  458  
 410  459                          if (!error) {
      460 +                                objset_t *os;
 411  461                                  uint64_t compress;
 412  462  
 413  463                                  if (strval == NULL || strval[0] == '\0') {
 414  464                                          objnum = zpool_prop_default_numeric(
 415  465                                              ZPOOL_PROP_BOOTFS);
 416  466                                          break;
 417  467                                  }
 418  468  
 419  469                                  if (error = dmu_objset_hold(strval, FTAG, &os))
 420  470                                          break;
↓ open down ↓ 129 lines elided ↑ open up ↑
 550  600  
 551  601          list_insert_head(&spa->spa_config_list, dp);
 552  602          if (need_sync)
 553  603                  spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
 554  604  }
 555  605  
 556  606  int
 557  607  spa_prop_set(spa_t *spa, nvlist_t *nvp)
 558  608  {
 559  609          int error;
 560      -        nvpair_t *elem;
      610 +        nvpair_t *elem = NULL;
 561  611          boolean_t need_sync = B_FALSE;
 562      -        zpool_prop_t prop;
 563  612  
 564  613          if ((error = spa_prop_validate(spa, nvp)) != 0)
 565  614                  return (error);
 566  615  
 567      -        elem = NULL;
 568  616          while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
 569      -                if ((prop = zpool_name_to_prop(
 570      -                    nvpair_name(elem))) == ZPROP_INVAL)
 571      -                        return (EINVAL);
      617 +                zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
 572  618  
 573  619                  if (prop == ZPOOL_PROP_CACHEFILE ||
 574  620                      prop == ZPOOL_PROP_ALTROOT ||
 575  621                      prop == ZPOOL_PROP_READONLY)
 576  622                          continue;
 577  623  
      624 +                if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) {
      625 +                        uint64_t ver;
      626 +
      627 +                        if (prop == ZPOOL_PROP_VERSION) {
      628 +                                VERIFY(nvpair_value_uint64(elem, &ver) == 0);
      629 +                        } else {
      630 +                                ASSERT(zpool_prop_feature(nvpair_name(elem)));
      631 +                                ver = SPA_VERSION_FEATURES;
      632 +                                need_sync = B_TRUE;
      633 +                        }
      634 +
      635 +                        /* Save time if the version is already set. */
      636 +                        if (ver == spa_version(spa))
      637 +                                continue;
      638 +
      639 +                        /*
      640 +                         * In addition to the pool directory object, we might
      641 +                         * create the pool properties object, the features for
      642 +                         * read object, the features for write object, or the
      643 +                         * feature descriptions object.
      644 +                         */
      645 +                        error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
      646 +                            spa_sync_version, spa, &ver, 6);
      647 +                        if (error)
      648 +                                return (error);
      649 +                        continue;
      650 +                }
      651 +
 578  652                  need_sync = B_TRUE;
 579  653                  break;
 580  654          }
 581  655  
 582      -        if (need_sync)
      656 +        if (need_sync) {
 583  657                  return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
 584      -                    spa, nvp, 3));
 585      -        else
 586      -                return (0);
      658 +                    spa, nvp, 6));
      659 +        }
      660 +
      661 +        return (0);
 587  662  }
 588  663  
 589  664  /*
 590  665   * If the bootfs property value is dsobj, clear it.
 591  666   */
 592  667  void
 593  668  spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
 594  669  {
 595  670          if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
 596  671                  VERIFY(zap_remove(spa->spa_meta_objset,
↓ open down ↓ 1003 lines elided ↑ open up ↑
1600 1675  
1601 1676  static void
1602 1677  spa_load_verify_done(zio_t *zio)
1603 1678  {
1604 1679          blkptr_t *bp = zio->io_bp;
1605 1680          spa_load_error_t *sle = zio->io_private;
1606 1681          dmu_object_type_t type = BP_GET_TYPE(bp);
1607 1682          int error = zio->io_error;
1608 1683  
1609 1684          if (error) {
1610      -                if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
     1685 +                if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
1611 1686                      type != DMU_OT_INTENT_LOG)
1612 1687                          atomic_add_64(&sle->sle_meta_count, 1);
1613 1688                  else
1614 1689                          atomic_add_64(&sle->sle_data_count, 1);
1615 1690          }
1616 1691          zio_data_buf_free(zio->io_data, zio->io_size);
1617 1692  }
1618 1693  
1619 1694  /*ARGSUSED*/
1620 1695  static int
↓ open down ↓ 209 lines elided ↑ open up ↑
1830 1905                  error = EEXIST;
1831 1906          } else {
1832 1907                  spa->spa_config_guid = pool_guid;
1833 1908  
1834 1909                  if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
1835 1910                      &nvl) == 0) {
1836 1911                          VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
1837 1912                              KM_SLEEP) == 0);
1838 1913                  }
1839 1914  
     1915 +                nvlist_free(spa->spa_load_info);
     1916 +                spa->spa_load_info = fnvlist_alloc();
     1917 +
1840 1918                  gethrestime(&spa->spa_loaded_ts);
1841 1919                  error = spa_load_impl(spa, pool_guid, config, state, type,
1842 1920                      mosconfig, &ereport);
1843 1921          }
1844 1922  
1845 1923          spa->spa_minref = refcount_count(&spa->spa_refcount);
1846 1924          if (error) {
1847 1925                  if (error != EEXIST) {
1848 1926                          spa->spa_loaded_ts.tv_sec = 0;
1849 1927                          spa->spa_loaded_ts.tv_nsec = 0;
↓ open down ↓ 12 lines elided ↑ open up ↑
1862 1940   * Load an existing storage pool, using the pool's builtin spa_config as a
1863 1941   * source of configuration information.
1864 1942   */
1865 1943  static int
1866 1944  spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
1867 1945      spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
1868 1946      char **ereport)
1869 1947  {
1870 1948          int error = 0;
1871 1949          nvlist_t *nvroot = NULL;
     1950 +        nvlist_t *label;
1872 1951          vdev_t *rvd;
1873 1952          uberblock_t *ub = &spa->spa_uberblock;
1874 1953          uint64_t children, config_cache_txg = spa->spa_config_txg;
1875 1954          int orig_mode = spa->spa_mode;
1876 1955          int parse;
1877 1956          uint64_t obj;
     1957 +        boolean_t missing_feat_write = B_FALSE;
1878 1958  
1879 1959          /*
1880 1960           * If this is an untrusted config, access the pool in read-only mode.
1881 1961           * This prevents things like resilvering recently removed devices.
1882 1962           */
1883 1963          if (!mosconfig)
1884 1964                  spa->spa_mode = FREAD;
1885 1965  
1886 1966          ASSERT(MUTEX_HELD(&spa_namespace_lock));
1887 1967  
↓ open down ↓ 59 lines elided ↑ open up ↑
1947 2027                  if (error != 0)
1948 2028                          return (error);
1949 2029  
1950 2030                  if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
1951 2031                          return (ENXIO);
1952 2032          }
1953 2033  
1954 2034          /*
1955 2035           * Find the best uberblock.
1956 2036           */
1957      -        vdev_uberblock_load(NULL, rvd, ub);
     2037 +        vdev_uberblock_load(rvd, ub, &label);
1958 2038  
1959 2039          /*
1960 2040           * If we weren't able to find a single valid uberblock, return failure.
1961 2041           */
1962      -        if (ub->ub_txg == 0)
     2042 +        if (ub->ub_txg == 0) {
     2043 +                nvlist_free(label);
1963 2044                  return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
     2045 +        }
1964 2046  
1965 2047          /*
1966      -         * If the pool is newer than the code, we can't open it.
     2048 +         * If the pool has an unsupported version we can't open it.
1967 2049           */
1968      -        if (ub->ub_version > SPA_VERSION)
     2050 +        if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
     2051 +                nvlist_free(label);
1969 2052                  return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
     2053 +        }
1970 2054  
     2055 +        if (ub->ub_version >= SPA_VERSION_FEATURES) {
     2056 +                nvlist_t *features;
     2057 +
     2058 +                /*
     2059 +                 * If we weren't able to find what's necessary for reading the
     2060 +                 * MOS in the label, return failure.
     2061 +                 */
     2062 +                if (label == NULL || nvlist_lookup_nvlist(label,
     2063 +                    ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) {
     2064 +                        nvlist_free(label);
     2065 +                        return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
     2066 +                            ENXIO));
     2067 +                }
     2068 +
     2069 +                /*
     2070 +                 * Update our in-core representation with the definitive values
     2071 +                 * from the label.
     2072 +                 */
     2073 +                nvlist_free(spa->spa_label_features);
     2074 +                VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
     2075 +        }
     2076 +
     2077 +        nvlist_free(label);
     2078 +
1971 2079          /*
     2080 +         * Look through entries in the label nvlist's features_for_read. If
     2081 +         * there is a feature listed there which we don't understand then we
     2082 +         * cannot open a pool.
     2083 +         */
     2084 +        if (ub->ub_version >= SPA_VERSION_FEATURES) {
     2085 +                nvlist_t *unsup_feat;
     2086 +
     2087 +                VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
     2088 +                    0);
     2089 +
     2090 +                for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
     2091 +                    NULL); nvp != NULL;
     2092 +                    nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
     2093 +                        if (!zfeature_is_supported(nvpair_name(nvp))) {
     2094 +                                VERIFY(nvlist_add_string(unsup_feat,
     2095 +                                    nvpair_name(nvp), "") == 0);
     2096 +                        }
     2097 +                }
     2098 +
     2099 +                if (!nvlist_empty(unsup_feat)) {
     2100 +                        VERIFY(nvlist_add_nvlist(spa->spa_load_info,
     2101 +                            ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
     2102 +                        nvlist_free(unsup_feat);
     2103 +                        return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
     2104 +                            ENOTSUP));
     2105 +                }
     2106 +
     2107 +                nvlist_free(unsup_feat);
     2108 +        }
     2109 +
     2110 +        /*
1972 2111           * If the vdev guid sum doesn't match the uberblock, we have an
1973 2112           * incomplete configuration.  We first check to see if the pool
1974 2113           * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN).
1975 2114           * If it is, defer the vdev_guid_sum check till later so we
1976 2115           * can handle missing vdevs.
1977 2116           */
1978 2117          if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
1979 2118              &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE &&
1980 2119              rvd->vdev_guid_sum != ub->ub_guid_sum)
1981 2120                  return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
↓ open down ↓ 11 lines elided ↑ open up ↑
1993 2132           */
1994 2133          spa->spa_state = POOL_STATE_ACTIVE;
1995 2134          spa->spa_ubsync = spa->spa_uberblock;
1996 2135          spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
1997 2136              TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1;
1998 2137          spa->spa_first_txg = spa->spa_last_ubsync_txg ?
1999 2138              spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
2000 2139          spa->spa_claim_max_txg = spa->spa_first_txg;
2001 2140          spa->spa_prev_software_version = ub->ub_software_version;
2002 2141  
2003      -        error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
     2142 +        error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
2004 2143          if (error)
2005 2144                  return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
2006 2145          spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
2007 2146  
2008 2147          if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
2009 2148                  return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
2010 2149  
     2150 +        if (spa_version(spa) >= SPA_VERSION_FEATURES) {
     2151 +                boolean_t missing_feat_read = B_FALSE;
     2152 +                nvlist_t *unsup_feat;
     2153 +
     2154 +                if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
     2155 +                    &spa->spa_feat_for_read_obj) != 0) {
     2156 +                        return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
     2157 +                }
     2158 +
     2159 +                if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE,
     2160 +                    &spa->spa_feat_for_write_obj) != 0) {
     2161 +                        return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
     2162 +                }
     2163 +
     2164 +                if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS,
     2165 +                    &spa->spa_feat_desc_obj) != 0) {
     2166 +                        return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
     2167 +                }
     2168 +
     2169 +                VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
     2170 +                    0);
     2171 +
     2172 +                if (!feature_is_supported(spa->spa_meta_objset,
     2173 +                    spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj,
     2174 +                    unsup_feat))
     2175 +                        missing_feat_read = B_TRUE;
     2176 +
     2177 +                if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
     2178 +                        if (!feature_is_supported(spa->spa_meta_objset,
     2179 +                            spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj,
     2180 +                            unsup_feat))
     2181 +                                missing_feat_write = B_TRUE;
     2182 +                }
     2183 +
     2184 +                if (!nvlist_empty(unsup_feat)) {
     2185 +                        VERIFY(nvlist_add_nvlist(spa->spa_load_info,
     2186 +                            ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
     2187 +                }
     2188 +
     2189 +                nvlist_free(unsup_feat);
     2190 +
     2191 +                if (!missing_feat_read) {
     2192 +                        fnvlist_add_boolean(spa->spa_load_info,
     2193 +                            ZPOOL_CONFIG_CAN_RDONLY);
     2194 +                }
     2195 +
     2196 +                /*
     2197 +                 * If the state is SPA_LOAD_TRYIMPORT, our objective is
     2198 +                 * twofold: to determine whether the pool is available for
     2199 +                 * import in read-write mode and (if it is not) whether the
     2200 +                 * pool is available for import in read-only mode. If the pool
     2201 +                 * is available for import in read-write mode, it is displayed
     2202 +                 * as available in userland; if it is not available for import
     2203 +                 * in read-only mode, it is displayed as unavailable in
     2204 +                 * userland. If the pool is available for import in read-only
     2205 +                 * mode but not read-write mode, it is displayed as unavailable
     2206 +                 * in userland with a special note that the pool is actually
     2207 +                 * available for open in read-only mode.
     2208 +                 *
     2209 +                 * As a result, if the state is SPA_LOAD_TRYIMPORT and we are
     2210 +                 * missing a feature for write, we must first determine whether
     2211 +                 * the pool can be opened read-only before returning to
     2212 +                 * userland in order to know whether to display the
     2213 +                 * abovementioned note.
     2214 +                 */
     2215 +                if (missing_feat_read || (missing_feat_write &&
     2216 +                    spa_writeable(spa))) {
     2217 +                        return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
     2218 +                            ENOTSUP));
     2219 +                }
     2220 +        }
     2221 +
     2222 +        spa->spa_is_initializing = B_TRUE;
     2223 +        error = dsl_pool_open(spa->spa_dsl_pool);
     2224 +        spa->spa_is_initializing = B_FALSE;
     2225 +        if (error != 0)
     2226 +                return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
     2227 +
2011 2228          if (!mosconfig) {
2012 2229                  uint64_t hostid;
2013 2230                  nvlist_t *policy = NULL, *nvconfig;
2014 2231  
2015 2232                  if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
2016 2233                          return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
2017 2234  
2018 2235                  if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
2019 2236                      ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
2020 2237                          char *hostname;
↓ open down ↓ 197 lines elided ↑ open up ↑
2218 2435                          return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
2219 2436  
2220 2437                  if (!spa_config_valid(spa, nvconfig)) {
2221 2438                          nvlist_free(nvconfig);
2222 2439                          return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM,
2223 2440                              ENXIO));
2224 2441                  }
2225 2442                  nvlist_free(nvconfig);
2226 2443  
2227 2444                  /*
2228      -                 * Now that we've validate the config, check the state of the
     2445 +                 * Now that we've validated the config, check the state of the
2229 2446                   * root vdev.  If it can't be opened, it indicates one or
2230 2447                   * more toplevel vdevs are faulted.
2231 2448                   */
2232 2449                  if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
2233 2450                          return (ENXIO);
2234 2451  
2235 2452                  if (spa_check_logs(spa)) {
2236 2453                          *ereport = FM_EREPORT_ZFS_LOG_REPLAY;
2237 2454                          return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
2238 2455                  }
2239 2456          }
2240 2457  
     2458 +        if (missing_feat_write) {
     2459 +                ASSERT(state == SPA_LOAD_TRYIMPORT);
     2460 +
     2461 +                /*
     2462 +                 * At this point, we know that we can open the pool in
     2463 +                 * read-only mode but not read-write mode. We now have enough
     2464 +                 * information and can return to userland.
     2465 +                 */
     2466 +                return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
     2467 +        }
     2468 +
2241 2469          /*
2242 2470           * We've successfully opened the pool, verify that we're ready
2243 2471           * to start pushing transactions.
2244 2472           */
2245 2473          if (state != SPA_LOAD_TRYIMPORT) {
2246 2474                  if (error = spa_load_verify(spa))
2247 2475                          return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
2248 2476                              error));
2249 2477          }
2250 2478  
↓ open down ↓ 89 lines elided ↑ open up ↑
2340 2568          spa_deactivate(spa);
2341 2569  
2342 2570          spa->spa_load_max_txg--;
2343 2571  
2344 2572          spa_activate(spa, mode);
2345 2573          spa_async_suspend(spa);
2346 2574  
2347 2575          return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
2348 2576  }
2349 2577  
     2578 +/*
     2579 + * If spa_load() fails this function will try loading prior txg's. If
     2580 + * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool
     2581 + * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this
     2582 + * function will not rewind the pool and will return the same error as
     2583 + * spa_load().
     2584 + */
2350 2585  static int
2351 2586  spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
2352 2587      uint64_t max_request, int rewind_flags)
2353 2588  {
     2589 +        nvlist_t *loadinfo = NULL;
2354 2590          nvlist_t *config = NULL;
2355 2591          int load_error, rewind_error;
2356 2592          uint64_t safe_rewind_txg;
2357 2593          uint64_t min_txg;
2358 2594  
2359 2595          if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
2360 2596                  spa->spa_load_max_txg = spa->spa_load_txg;
2361 2597                  spa_set_log_state(spa, SPA_LOG_CLEAR);
2362 2598          } else {
2363 2599                  spa->spa_load_max_txg = max_request;
↓ open down ↓ 8 lines elided ↑ open up ↑
2372 2608                  config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
2373 2609  
2374 2610          spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
2375 2611          spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
2376 2612  
2377 2613          if (rewind_flags & ZPOOL_NEVER_REWIND) {
2378 2614                  nvlist_free(config);
2379 2615                  return (load_error);
2380 2616          }
2381 2617  
2382      -        /* Price of rolling back is discarding txgs, including log */
2383      -        if (state == SPA_LOAD_RECOVER)
     2618 +        if (state == SPA_LOAD_RECOVER) {
     2619 +                /* Price of rolling back is discarding txgs, including log */
2384 2620                  spa_set_log_state(spa, SPA_LOG_CLEAR);
     2621 +        } else {
     2622 +                /*
     2623 +                 * If we aren't rolling back save the load info from our first
     2624 +                 * import attempt so that we can restore it after attempting
     2625 +                 * to rewind.
     2626 +                 */
     2627 +                loadinfo = spa->spa_load_info;
     2628 +                spa->spa_load_info = fnvlist_alloc();
     2629 +        }
2385 2630  
2386 2631          spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
2387 2632          safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
2388 2633          min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ?
2389 2634              TXG_INITIAL : safe_rewind_txg;
2390 2635  
2391 2636          /*
2392 2637           * Continue as long as we're finding errors, we're still within
2393 2638           * the acceptable rewind range, and we're still finding uberblocks
2394 2639           */
↓ open down ↓ 3 lines elided ↑ open up ↑
2398 2643                          spa->spa_extreme_rewind = B_TRUE;
2399 2644                  rewind_error = spa_load_retry(spa, state, mosconfig);
2400 2645          }
2401 2646  
2402 2647          spa->spa_extreme_rewind = B_FALSE;
2403 2648          spa->spa_load_max_txg = UINT64_MAX;
2404 2649  
2405 2650          if (config && (rewind_error || state != SPA_LOAD_RECOVER))
2406 2651                  spa_config_set(spa, config);
2407 2652  
2408      -        return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
     2653 +        if (state == SPA_LOAD_RECOVER) {
     2654 +                ASSERT3P(loadinfo, ==, NULL);
     2655 +                return (rewind_error);
     2656 +        } else {
     2657 +                /* Store the rewind info as part of the initial load info */
     2658 +                fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO,
     2659 +                    spa->spa_load_info);
     2660 +
     2661 +                /* Restore the initial load info */
     2662 +                fnvlist_free(spa->spa_load_info);
     2663 +                spa->spa_load_info = loadinfo;
     2664 +
     2665 +                return (load_error);
     2666 +        }
2409 2667  }
2410 2668  
2411 2669  /*
2412 2670   * Pool Open/Import
2413 2671   *
2414 2672   * The import case is identical to an open except that the configuration is sent
2415 2673   * down from userland, instead of grabbed from the configuration cache.  For the
2416 2674   * case of an open, the pool configuration will exist in the
2417 2675   * POOL_STATE_UNINITIALIZED state.
2418 2676   *
↓ open down ↓ 249 lines elided ↑ open up ↑
2668 2926                          ASSERT(vd != NULL);
2669 2927  
2670 2928                          VERIFY(nvlist_lookup_uint64_array(l2cache[i],
2671 2929                              ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
2672 2930                              == 0);
2673 2931                          vdev_get_stats(vd, vs);
2674 2932                  }
2675 2933          }
2676 2934  }
2677 2935  
     2936 +static void
     2937 +spa_add_feature_stats(spa_t *spa, nvlist_t *config)
     2938 +{
     2939 +        nvlist_t *features;
     2940 +        zap_cursor_t zc;
     2941 +        zap_attribute_t za;
     2942 +
     2943 +        ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
     2944 +        VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0);
     2945 +
     2946 +        if (spa->spa_feat_for_read_obj != 0) {
     2947 +                for (zap_cursor_init(&zc, spa->spa_meta_objset,
     2948 +                    spa->spa_feat_for_read_obj);
     2949 +                    zap_cursor_retrieve(&zc, &za) == 0;
     2950 +                    zap_cursor_advance(&zc)) {
     2951 +                        ASSERT(za.za_integer_length == sizeof (uint64_t) &&
     2952 +                            za.za_num_integers == 1);
     2953 +                        VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
     2954 +                            za.za_first_integer));
     2955 +                }
     2956 +                zap_cursor_fini(&zc);
     2957 +        }
     2958 +
     2959 +        if (spa->spa_feat_for_write_obj != 0) {
     2960 +                for (zap_cursor_init(&zc, spa->spa_meta_objset,
     2961 +                    spa->spa_feat_for_write_obj);
     2962 +                    zap_cursor_retrieve(&zc, &za) == 0;
     2963 +                    zap_cursor_advance(&zc)) {
     2964 +                        ASSERT(za.za_integer_length == sizeof (uint64_t) &&
     2965 +                            za.za_num_integers == 1);
     2966 +                        VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
     2967 +                            za.za_first_integer));
     2968 +                }
     2969 +                zap_cursor_fini(&zc);
     2970 +        }
     2971 +
     2972 +        VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
     2973 +            features) == 0);
     2974 +        nvlist_free(features);
     2975 +}
     2976 +
2678 2977  int
2679      -spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
     2978 +spa_get_stats(const char *name, nvlist_t **config,
     2979 +    char *altroot, size_t buflen)
2680 2980  {
2681 2981          int error;
2682 2982          spa_t *spa;
2683 2983  
2684 2984          *config = NULL;
2685 2985          error = spa_open_common(name, &spa, FTAG, NULL, config);
2686 2986  
2687 2987          if (spa != NULL) {
2688 2988                  /*
2689 2989                   * This still leaves a window of inconsistency where the spares
↓ open down ↓ 14 lines elided ↑ open up ↑
2704 3004                              ZPOOL_CONFIG_ERRCOUNT,
2705 3005                              spa_get_errlog_size(spa)) == 0);
2706 3006  
2707 3007                          if (spa_suspended(spa))
2708 3008                                  VERIFY(nvlist_add_uint64(*config,
2709 3009                                      ZPOOL_CONFIG_SUSPENDED,
2710 3010                                      spa->spa_failmode) == 0);
2711 3011  
2712 3012                          spa_add_spares(spa, *config);
2713 3013                          spa_add_l2cache(spa, *config);
     3014 +                        spa_add_feature_stats(spa, *config);
2714 3015                  }
2715 3016          }
2716 3017  
2717 3018          /*
2718 3019           * We want to get the alternate root even for faulted pools, so we cheat
2719 3020           * and call spa_lookup() directly.
2720 3021           */
2721 3022          if (altroot) {
2722 3023                  if (spa == NULL) {
2723 3024                          mutex_enter(&spa_namespace_lock);
↓ open down ↓ 200 lines elided ↑ open up ↑
2924 3225          spa_t *spa;
2925 3226          char *altroot = NULL;
2926 3227          vdev_t *rvd;
2927 3228          dsl_pool_t *dp;
2928 3229          dmu_tx_t *tx;
2929 3230          int error = 0;
2930 3231          uint64_t txg = TXG_INITIAL;
2931 3232          nvlist_t **spares, **l2cache;
2932 3233          uint_t nspares, nl2cache;
2933 3234          uint64_t version, obj;
     3235 +        boolean_t has_features;
2934 3236  
2935 3237          /*
2936 3238           * If this pool already exists, return failure.
2937 3239           */
2938 3240          mutex_enter(&spa_namespace_lock);
2939 3241          if (spa_lookup(pool) != NULL) {
2940 3242                  mutex_exit(&spa_namespace_lock);
2941 3243                  return (EEXIST);
2942 3244          }
2943 3245  
↓ open down ↓ 5 lines elided ↑ open up ↑
2949 3251          spa = spa_add(pool, NULL, altroot);
2950 3252          spa_activate(spa, spa_mode_global);
2951 3253  
2952 3254          if (props && (error = spa_prop_validate(spa, props))) {
2953 3255                  spa_deactivate(spa);
2954 3256                  spa_remove(spa);
2955 3257                  mutex_exit(&spa_namespace_lock);
2956 3258                  return (error);
2957 3259          }
2958 3260  
2959      -        if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
2960      -            &version) != 0)
     3261 +        has_features = B_FALSE;
     3262 +        for (nvpair_t *elem = nvlist_next_nvpair(props, NULL);
     3263 +            elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
     3264 +                if (zpool_prop_feature(nvpair_name(elem)))
     3265 +                        has_features = B_TRUE;
     3266 +        }
     3267 +
     3268 +        if (has_features || nvlist_lookup_uint64(props,
     3269 +            zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) {
2961 3270                  version = SPA_VERSION;
2962      -        ASSERT(version <= SPA_VERSION);
     3271 +        }
     3272 +        ASSERT(SPA_VERSION_IS_SUPPORTED(version));
2963 3273  
2964 3274          spa->spa_first_txg = txg;
2965 3275          spa->spa_uberblock.ub_txg = txg - 1;
2966 3276          spa->spa_uberblock.ub_version = version;
2967 3277          spa->spa_ubsync = spa->spa_uberblock;
2968 3278  
2969 3279          /*
2970 3280           * Create "The Godfather" zio to hold all async IOs
2971 3281           */
2972 3282          spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
↓ open down ↓ 55 lines elided ↑ open up ↑
3028 3338                  VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
3029 3339                      NV_UNIQUE_NAME, KM_SLEEP) == 0);
3030 3340                  VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
3031 3341                      ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
3032 3342                  spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3033 3343                  spa_load_l2cache(spa);
3034 3344                  spa_config_exit(spa, SCL_ALL, FTAG);
3035 3345                  spa->spa_l2cache.sav_sync = B_TRUE;
3036 3346          }
3037 3347  
     3348 +        spa->spa_is_initializing = B_TRUE;
3038 3349          spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
3039 3350          spa->spa_meta_objset = dp->dp_meta_objset;
     3351 +        spa->spa_is_initializing = B_FALSE;
3040 3352  
3041 3353          /*
3042 3354           * Create DDTs (dedup tables).
3043 3355           */
3044 3356          ddt_create(spa);
3045 3357  
3046 3358          spa_update_dspace(spa);
3047 3359  
3048 3360          tx = dmu_tx_create_assigned(dp, txg);
3049 3361  
↓ open down ↓ 3 lines elided ↑ open up ↑
3053 3365          spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
3054 3366              DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE,
3055 3367              DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
3056 3368  
3057 3369          if (zap_add(spa->spa_meta_objset,
3058 3370              DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
3059 3371              sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
3060 3372                  cmn_err(CE_PANIC, "failed to add pool config");
3061 3373          }
3062 3374  
     3375 +        if (spa_version(spa) >= SPA_VERSION_FEATURES)
     3376 +                spa_feature_create_zap_objects(spa, tx);
     3377 +
3063 3378          if (zap_add(spa->spa_meta_objset,
3064 3379              DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
3065 3380              sizeof (uint64_t), 1, &version, tx) != 0) {
3066 3381                  cmn_err(CE_PANIC, "failed to add pool version");
3067 3382          }
3068 3383  
3069 3384          /* Newly created pools with the right version are always deflated. */
3070 3385          if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
3071 3386                  spa->spa_deflate = TRUE;
3072 3387                  if (zap_add(spa->spa_meta_objset,
↓ open down ↓ 170 lines elided ↑ open up ↑
3243 3558  #if defined(_OBP) && defined(_KERNEL)
3244 3559          if (config == NULL) {
3245 3560                  if (strstr(devpath, "/iscsi/ssd") != NULL) {
3246 3561                          /* iscsi boot */
3247 3562                          get_iscsi_bootpath_phy(devpath);
3248 3563                          config = spa_generate_rootconf(devpath, devid, &guid);
3249 3564                  }
3250 3565          }
3251 3566  #endif
3252 3567          if (config == NULL) {
3253      -                cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
     3568 +                cmn_err(CE_NOTE, "Cannot read the pool label from '%s'",
3254 3569                      devpath);
3255 3570                  return (EIO);
3256 3571          }
3257 3572  
3258 3573          VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
3259 3574              &pname) == 0);
3260 3575          VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
3261 3576  
3262 3577          mutex_enter(&spa_namespace_lock);
3263 3578          if ((spa = spa_lookup(pname)) != NULL) {
↓ open down ↓ 293 lines elided ↑ open up ↑
3557 3872           * If 'tryconfig' was at least parsable, return the current config.
3558 3873           */
3559 3874          if (spa->spa_root_vdev != NULL) {
3560 3875                  config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
3561 3876                  VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
3562 3877                      poolname) == 0);
3563 3878                  VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
3564 3879                      state) == 0);
3565 3880                  VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
3566 3881                      spa->spa_uberblock.ub_timestamp) == 0);
     3882 +                VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
     3883 +                    spa->spa_load_info) == 0);
3567 3884  
3568 3885                  /*
3569 3886                   * If the bootfs property exists on this pool then we
3570 3887                   * copy it out so that external consumers can tell which
3571 3888                   * pools are bootable.
3572 3889                   */
3573 3890                  if ((!error || error == EEXIST) && spa->spa_bootfs) {
3574 3891                          char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3575 3892  
3576 3893                          /*
↓ open down ↓ 1697 lines elided ↑ open up ↑
5274 5591          size_t nvsize = 0;
5275 5592          dmu_buf_t *db;
5276 5593  
5277 5594          VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
5278 5595  
5279 5596          /*
5280 5597           * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
5281 5598           * information.  This avoids the dbuf_will_dirty() path and
5282 5599           * saves us a pre-read to get data we don't actually care about.
5283 5600           */
5284      -        bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
     5601 +        bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
5285 5602          packed = kmem_alloc(bufsize, KM_SLEEP);
5286 5603  
5287 5604          VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
5288 5605              KM_SLEEP) == 0);
5289 5606          bzero(packed + nvsize, bufsize - nvsize);
5290 5607  
5291 5608          dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
5292 5609  
5293 5610          kmem_free(packed, bufsize);
5294 5611  
↓ open down ↓ 64 lines elided ↑ open up ↑
5359 5676  
5360 5677          spa_config_exit(spa, SCL_STATE, FTAG);
5361 5678  
5362 5679          if (spa->spa_config_syncing)
5363 5680                  nvlist_free(spa->spa_config_syncing);
5364 5681          spa->spa_config_syncing = config;
5365 5682  
5366 5683          spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
5367 5684  }
5368 5685  
     5686 +static void
     5687 +spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
     5688 +{
     5689 +        spa_t *spa = arg1;
     5690 +        uint64_t version = *(uint64_t *)arg2;
     5691 +
     5692 +        /*
     5693 +         * Setting the version is special cased when first creating the pool.
     5694 +         */
     5695 +        ASSERT(tx->tx_txg != TXG_INITIAL);
     5696 +
     5697 +        ASSERT(version <= SPA_VERSION);
     5698 +        ASSERT(version >= spa_version(spa));
     5699 +
     5700 +        spa->spa_uberblock.ub_version = version;
     5701 +        vdev_config_dirty(spa->spa_root_vdev);
     5702 +}
     5703 +
5369 5704  /*
5370 5705   * Set zpool properties.
5371 5706   */
5372 5707  static void
5373 5708  spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
5374 5709  {
5375 5710          spa_t *spa = arg1;
5376 5711          objset_t *mos = spa->spa_meta_objset;
5377 5712          nvlist_t *nvp = arg2;
5378      -        nvpair_t *elem;
5379      -        uint64_t intval;
5380      -        char *strval;
5381      -        zpool_prop_t prop;
5382      -        const char *propname;
5383      -        zprop_type_t proptype;
     5713 +        nvpair_t *elem = NULL;
5384 5714  
5385 5715          mutex_enter(&spa->spa_props_lock);
5386 5716  
5387      -        elem = NULL;
5388 5717          while ((elem = nvlist_next_nvpair(nvp, elem))) {
     5718 +                uint64_t intval;
     5719 +                char *strval, *fname;
     5720 +                zpool_prop_t prop;
     5721 +                const char *propname;
     5722 +                zprop_type_t proptype;
     5723 +                zfeature_info_t *feature;
     5724 +
5389 5725                  switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
     5726 +                case ZPROP_INVAL:
     5727 +                        /*
     5728 +                         * We checked this earlier in spa_prop_validate().
     5729 +                         */
     5730 +                        ASSERT(zpool_prop_feature(nvpair_name(elem)));
     5731 +
     5732 +                        fname = strchr(nvpair_name(elem), '@') + 1;
     5733 +                        VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
     5734 +
     5735 +                        spa_feature_enable(spa, feature, tx);
     5736 +                        break;
     5737 +
5390 5738                  case ZPOOL_PROP_VERSION:
     5739 +                        VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5391 5740                          /*
5392      -                         * Only set version for non-zpool-creation cases
5393      -                         * (set/import). spa_create() needs special care
5394      -                         * for version setting.
     5741 +                         * The version is synced seperatly before other
     5742 +                         * properties and should be correct by now.
5395 5743                           */
5396      -                        if (tx->tx_txg != TXG_INITIAL) {
5397      -                                VERIFY(nvpair_value_uint64(elem,
5398      -                                    &intval) == 0);
5399      -                                ASSERT(intval <= SPA_VERSION);
5400      -                                ASSERT(intval >= spa_version(spa));
5401      -                                spa->spa_uberblock.ub_version = intval;
5402      -                                vdev_config_dirty(spa->spa_root_vdev);
5403      -                        }
     5744 +                        ASSERT3U(spa_version(spa), >=, intval);
5404 5745                          break;
5405 5746  
5406 5747                  case ZPOOL_PROP_ALTROOT:
5407 5748                          /*
5408 5749                           * 'altroot' is a non-persistent property. It should
5409 5750                           * have been set temporarily at creation or import time.
5410 5751                           */
5411 5752                          ASSERT(spa->spa_root != NULL);
5412 5753                          break;
5413 5754  
↓ open down ↓ 16 lines elided ↑ open up ↑
5430 5771                           * configuratoin has already been dirtied.
5431 5772                           */
5432 5773                          if (tx->tx_txg != TXG_INITIAL)
5433 5774                                  vdev_config_dirty(spa->spa_root_vdev);
5434 5775                          break;
5435 5776                  default:
5436 5777                          /*
5437 5778                           * Set pool property values in the poolprops mos object.
5438 5779                           */
5439 5780                          if (spa->spa_pool_props_object == 0) {
5440      -                                VERIFY((spa->spa_pool_props_object =
5441      -                                    zap_create(mos, DMU_OT_POOL_PROPS,
5442      -                                    DMU_OT_NONE, 0, tx)) > 0);
5443      -
5444      -                                VERIFY(zap_update(mos,
     5781 +                                spa->spa_pool_props_object =
     5782 +                                    zap_create_link(mos, DMU_OT_POOL_PROPS,
5445 5783                                      DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
5446      -                                    8, 1, &spa->spa_pool_props_object, tx)
5447      -                                    == 0);
     5784 +                                    tx);
5448 5785                          }
5449 5786  
5450 5787                          /* normalize the property name */
5451 5788                          propname = zpool_prop_to_name(prop);
5452 5789                          proptype = zpool_prop_get_type(prop);
5453 5790  
5454 5791                          if (nvpair_type(elem) == DATA_TYPE_STRING) {
5455 5792                                  ASSERT(proptype == PROP_TYPE_STRING);
5456 5793                                  VERIFY(nvpair_value_string(elem, &strval) == 0);
5457 5794                                  VERIFY(zap_update(mos,
↓ open down ↓ 78 lines elided ↑ open up ↑
5536 5873                  dsl_pool_upgrade_clones(dp, tx);
5537 5874          }
5538 5875  
5539 5876          if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES &&
5540 5877              spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) {
5541 5878                  dsl_pool_upgrade_dir_clones(dp, tx);
5542 5879  
5543 5880                  /* Keeping the freedir open increases spa_minref */
5544 5881                  spa->spa_minref += 3;
5545 5882          }
     5883 +
     5884 +        if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
     5885 +            spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
     5886 +                spa_feature_create_zap_objects(spa, tx);
     5887 +        }
5546 5888  }
5547 5889  
5548 5890  /*
5549 5891   * Sync the specified transaction group.  New blocks may be dirtied as
5550 5892   * part of the process, so we iterate until it converges.
5551 5893   */
5552 5894  void
5553 5895  spa_sync(spa_t *spa, uint64_t txg)
5554 5896  {
5555 5897          dsl_pool_t *dp = spa->spa_dsl_pool;
↓ open down ↓ 427 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX