Print this page
    
1693 persistent 'comment' field for a zpool
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/spa_config.c
          +++ new/usr/src/uts/common/fs/zfs/spa_config.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
       25 + * Copyright (c) 2011 by Delphix. All rights reserved.
  24   26   */
  25   27  
  26   28  #include <sys/spa.h>
  27   29  #include <sys/spa_impl.h>
  28   30  #include <sys/nvpair.h>
  29   31  #include <sys/uio.h>
  30   32  #include <sys/fs/zfs.h>
  31   33  #include <sys/vdev_impl.h>
  32   34  #include <sys/zfs_ioctl.h>
  33   35  #include <sys/utsname.h>
  34   36  #include <sys/systeminfo.h>
  35   37  #include <sys/sunddi.h>
  36   38  #ifdef _KERNEL
  37   39  #include <sys/kobj.h>
  38   40  #include <sys/zone.h>
  39   41  #endif
  40   42  
  41   43  /*
  42   44   * Pool configuration repository.
  43   45   *
  44   46   * Pool configuration is stored as a packed nvlist on the filesystem.  By
  45   47   * default, all pools are stored in /etc/zfs/zpool.cache and loaded on boot
  46   48   * (when the ZFS module is loaded).  Pools can also have the 'cachefile'
  47   49   * property set that allows them to be stored in an alternate location until
  48   50   * the control of external software.
  49   51   *
  50   52   * For each cache file, we have a single nvlist which holds all the
  51   53   * configuration information.  When the module loads, we read this information
  52   54   * from /etc/zfs/zpool.cache and populate the SPA namespace.  This namespace is
  53   55   * maintained independently in spa.c.  Whenever the namespace is modified, or
  54   56   * the configuration of a pool is changed, we call spa_config_sync(), which
  55   57   * walks through all the active pools and writes the configuration to disk.
  56   58   */
  57   59  
  58   60  static uint64_t spa_config_generation = 1;
  59   61  
  60   62  /*
  61   63   * This can be overridden in userland to preserve an alternate namespace for
  62   64   * userland pools when doing testing.
  63   65   */
  64   66  const char *spa_config_path = ZPOOL_CACHE;
  65   67  
  66   68  /*
  67   69   * Called when the module is first loaded, this routine loads the configuration
  68   70   * file into the SPA namespace.  It does not actually open or load the pools; it
  69   71   * only populates the namespace.
  70   72   */
  71   73  void
  72   74  spa_config_load(void)
  73   75  {
  74   76          void *buf = NULL;
  75   77          nvlist_t *nvlist, *child;
  76   78          nvpair_t *nvpair;
  77   79          char *pathname;
  78   80          struct _buf *file;
  79   81          uint64_t fsize;
  80   82  
  81   83          /*
  82   84           * Open the configuration file.
  83   85           */
  84   86          pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
  85   87  
  86   88          (void) snprintf(pathname, MAXPATHLEN, "%s%s",
  87   89              (rootdir != NULL) ? "./" : "", spa_config_path);
  88   90  
  89   91          file = kobj_open_file(pathname);
  90   92  
  91   93          kmem_free(pathname, MAXPATHLEN);
  92   94  
  93   95          if (file == (struct _buf *)-1)
  94   96                  return;
  95   97  
  96   98          if (kobj_get_filesize(file, &fsize) != 0)
  97   99                  goto out;
  98  100  
  99  101          buf = kmem_alloc(fsize, KM_SLEEP);
 100  102  
 101  103          /*
 102  104           * Read the nvlist from the file.
 103  105           */
 104  106          if (kobj_read_file(file, buf, fsize, 0) < 0)
 105  107                  goto out;
 106  108  
 107  109          /*
 108  110           * Unpack the nvlist.
 109  111           */
 110  112          if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0)
 111  113                  goto out;
 112  114  
 113  115          /*
 114  116           * Iterate over all elements in the nvlist, creating a new spa_t for
 115  117           * each one with the specified configuration.
 116  118           */
 117  119          mutex_enter(&spa_namespace_lock);
 118  120          nvpair = NULL;
 119  121          while ((nvpair = nvlist_next_nvpair(nvlist, nvpair)) != NULL) {
 120  122                  if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
 121  123                          continue;
 122  124  
 123  125                  VERIFY(nvpair_value_nvlist(nvpair, &child) == 0);
 124  126  
 125  127                  if (spa_lookup(nvpair_name(nvpair)) != NULL)
 126  128                          continue;
 127  129                  (void) spa_add(nvpair_name(nvpair), child, NULL);
 128  130          }
 129  131          mutex_exit(&spa_namespace_lock);
 130  132  
 131  133          nvlist_free(nvlist);
 132  134  
 133  135  out:
 134  136          if (buf != NULL)
 135  137                  kmem_free(buf, fsize);
 136  138  
 137  139          kobj_close_file(file);
 138  140  }
 139  141  
 140  142  static void
 141  143  spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
 142  144  {
 143  145          size_t buflen;
 144  146          char *buf;
 145  147          vnode_t *vp;
 146  148          int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
 147  149          char *temp;
 148  150  
 149  151          /*
 150  152           * If the nvlist is empty (NULL), then remove the old cachefile.
 151  153           */
 152  154          if (nvl == NULL) {
 153  155                  (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
 154  156                  return;
 155  157          }
 156  158  
 157  159          /*
 158  160           * Pack the configuration into a buffer.
 159  161           */
 160  162          VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0);
 161  163  
 162  164          buf = kmem_alloc(buflen, KM_SLEEP);
 163  165          temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 164  166  
 165  167          VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR,
 166  168              KM_SLEEP) == 0);
 167  169  
 168  170          /*
 169  171           * Write the configuration to disk.  We need to do the traditional
 170  172           * 'write to temporary file, sync, move over original' to make sure we
 171  173           * always have a consistent view of the data.
 172  174           */
 173  175          (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path);
 174  176  
 175  177          if (vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0) == 0) {
 176  178                  if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
 177  179                      0, RLIM64_INFINITY, kcred, NULL) == 0 &&
 178  180                      VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) {
 179  181                          (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
 180  182                  }
 181  183                  (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
 182  184                  VN_RELE(vp);
 183  185          }
 184  186  
 185  187          (void) vn_remove(temp, UIO_SYSSPACE, RMFILE);
 186  188  
 187  189          kmem_free(buf, buflen);
 188  190          kmem_free(temp, MAXPATHLEN);
 189  191  }
 190  192  
 191  193  /*
 192  194   * Synchronize pool configuration to disk.  This must be called with the
 193  195   * namespace lock held.
 194  196   */
 195  197  void
 196  198  spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
 197  199  {
 198  200          spa_config_dirent_t *dp, *tdp;
 199  201          nvlist_t *nvl;
 200  202  
 201  203          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 202  204  
 203  205          if (rootdir == NULL || !(spa_mode_global & FWRITE))
 204  206                  return;
 205  207  
 206  208          /*
 207  209           * Iterate over all cachefiles for the pool, past or present.  When the
 208  210           * cachefile is changed, the new one is pushed onto this list, allowing
 209  211           * us to update previous cachefiles that no longer contain this pool.
 210  212           */
 211  213          for (dp = list_head(&target->spa_config_list); dp != NULL;
 212  214              dp = list_next(&target->spa_config_list, dp)) {
 213  215                  spa_t *spa = NULL;
 214  216                  if (dp->scd_path == NULL)
 215  217                          continue;
 216  218  
 217  219                  /*
 218  220                   * Iterate over all pools, adding any matching pools to 'nvl'.
 219  221                   */
 220  222                  nvl = NULL;
 221  223                  while ((spa = spa_next(spa)) != NULL) {
 222  224                          if (spa == target && removing)
 223  225                                  continue;
 224  226  
 225  227                          mutex_enter(&spa->spa_props_lock);
 226  228                          tdp = list_head(&spa->spa_config_list);
 227  229                          if (spa->spa_config == NULL ||
 228  230                              tdp->scd_path == NULL ||
 229  231                              strcmp(tdp->scd_path, dp->scd_path) != 0) {
 230  232                                  mutex_exit(&spa->spa_props_lock);
 231  233                                  continue;
 232  234                          }
 233  235  
 234  236                          if (nvl == NULL)
 235  237                                  VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME,
 236  238                                      KM_SLEEP) == 0);
 237  239  
 238  240                          VERIFY(nvlist_add_nvlist(nvl, spa->spa_name,
 239  241                              spa->spa_config) == 0);
 240  242                          mutex_exit(&spa->spa_props_lock);
 241  243                  }
 242  244  
 243  245                  spa_config_write(dp, nvl);
 244  246                  nvlist_free(nvl);
 245  247          }
 246  248  
 247  249          /*
 248  250           * Remove any config entries older than the current one.
 249  251           */
 250  252          dp = list_head(&target->spa_config_list);
 251  253          while ((tdp = list_next(&target->spa_config_list, dp)) != NULL) {
 252  254                  list_remove(&target->spa_config_list, tdp);
 253  255                  if (tdp->scd_path != NULL)
 254  256                          spa_strfree(tdp->scd_path);
 255  257                  kmem_free(tdp, sizeof (spa_config_dirent_t));
 256  258          }
 257  259  
 258  260          spa_config_generation++;
 259  261  
 260  262          if (postsysevent)
 261  263                  spa_event_notify(target, NULL, ESC_ZFS_CONFIG_SYNC);
 262  264  }
 263  265  
 264  266  /*
 265  267   * Sigh.  Inside a local zone, we don't have access to /etc/zfs/zpool.cache,
 266  268   * and we don't want to allow the local zone to see all the pools anyway.
 267  269   * So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration
 268  270   * information for all pool visible within the zone.
 269  271   */
 270  272  nvlist_t *
 271  273  spa_all_configs(uint64_t *generation)
 272  274  {
 273  275          nvlist_t *pools;
 274  276          spa_t *spa = NULL;
 275  277  
 276  278          if (*generation == spa_config_generation)
 277  279                  return (NULL);
 278  280  
 279  281          VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 280  282  
 281  283          mutex_enter(&spa_namespace_lock);
 282  284          while ((spa = spa_next(spa)) != NULL) {
 283  285                  if (INGLOBALZONE(curproc) ||
 284  286                      zone_dataset_visible(spa_name(spa), NULL)) {
 285  287                          mutex_enter(&spa->spa_props_lock);
 286  288                          VERIFY(nvlist_add_nvlist(pools, spa_name(spa),
 287  289                              spa->spa_config) == 0);
 288  290                          mutex_exit(&spa->spa_props_lock);
 289  291                  }
 290  292          }
 291  293          *generation = spa_config_generation;
 292  294          mutex_exit(&spa_namespace_lock);
 293  295  
 294  296          return (pools);
 295  297  }
 296  298  
 297  299  void
 298  300  spa_config_set(spa_t *spa, nvlist_t *config)
 299  301  {
 300  302          mutex_enter(&spa->spa_props_lock);
 301  303          if (spa->spa_config != NULL)
 302  304                  nvlist_free(spa->spa_config);
 303  305          spa->spa_config = config;
 304  306          mutex_exit(&spa->spa_props_lock);
 305  307  }
 306  308  
 307  309  /*
 308  310   * Generate the pool's configuration based on the current in-core state.
 309  311   * We infer whether to generate a complete config or just one top-level config
 310  312   * based on whether vd is the root vdev.
 311  313   */
 312  314  nvlist_t *
 313  315  spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
 314  316  {
 315  317          nvlist_t *config, *nvroot;
 316  318          vdev_t *rvd = spa->spa_root_vdev;
 317  319          unsigned long hostid = 0;
 318  320          boolean_t locked = B_FALSE;
 319  321          uint64_t split_guid;
 320  322  
 321  323          if (vd == NULL) {
 322  324                  vd = rvd;
 323  325                  locked = B_TRUE;
 324  326                  spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
 325  327          }
 326  328  
 327  329          ASSERT(spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
 328  330              (SCL_CONFIG | SCL_STATE));
 329  331  
 330  332          /*
 331  333           * If txg is -1, report the current value of spa->spa_config_txg.
 332  334           */
 333  335          if (txg == -1ULL)
 334  336                  txg = spa->spa_config_txg;
 335  337  
 336  338          VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 337  339  
  
    | 
      ↓ open down ↓ | 
    304 lines elided | 
    
      ↑ open up ↑ | 
  
 338  340          VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
 339  341              spa_version(spa)) == 0);
 340  342          VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
 341  343              spa_name(spa)) == 0);
 342  344          VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 343  345              spa_state(spa)) == 0);
 344  346          VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 345  347              txg) == 0);
 346  348          VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 347  349              spa_guid(spa)) == 0);
      350 +        VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
      351 +            ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);
      352 +
      353 +
 348  354  #ifdef  _KERNEL
 349  355          hostid = zone_get_hostid(NULL);
 350  356  #else   /* _KERNEL */
 351  357          /*
 352  358           * We're emulating the system's hostid in userland, so we can't use
 353  359           * zone_get_hostid().
 354  360           */
 355  361          (void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
 356  362  #endif  /* _KERNEL */
 357  363          if (hostid != 0) {
 358  364                  VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
 359  365                      hostid) == 0);
 360  366          }
 361  367          VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
 362  368              utsname.nodename) == 0);
 363  369  
 364  370          if (vd != rvd) {
 365  371                  VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
 366  372                      vd->vdev_top->vdev_guid) == 0);
 367  373                  VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
 368  374                      vd->vdev_guid) == 0);
 369  375                  if (vd->vdev_isspare)
 370  376                          VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
 371  377                              1ULL) == 0);
 372  378                  if (vd->vdev_islog)
 373  379                          VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
 374  380                              1ULL) == 0);
 375  381                  vd = vd->vdev_top;              /* label contains top config */
 376  382          } else {
 377  383                  /*
 378  384                   * Only add the (potentially large) split information
 379  385                   * in the mos config, and not in the vdev labels
 380  386                   */
 381  387                  if (spa->spa_config_splitting != NULL)
 382  388                          VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
 383  389                              spa->spa_config_splitting) == 0);
 384  390          }
 385  391  
 386  392          /*
 387  393           * Add the top-level config.  We even add this on pools which
 388  394           * don't support holes in the namespace.
 389  395           */
 390  396          vdev_top_config_generate(spa, config);
 391  397  
 392  398          /*
 393  399           * If we're splitting, record the original pool's guid.
 394  400           */
 395  401          if (spa->spa_config_splitting != NULL &&
 396  402              nvlist_lookup_uint64(spa->spa_config_splitting,
 397  403              ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
 398  404                  VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
 399  405                      split_guid) == 0);
 400  406          }
 401  407  
 402  408          nvroot = vdev_config_generate(spa, vd, getstats, 0);
 403  409          VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 404  410          nvlist_free(nvroot);
 405  411  
 406  412          if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
 407  413                  ddt_histogram_t *ddh;
 408  414                  ddt_stat_t *dds;
 409  415                  ddt_object_t *ddo;
 410  416  
 411  417                  ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
 412  418                  ddt_get_dedup_histogram(spa, ddh);
 413  419                  VERIFY(nvlist_add_uint64_array(config,
 414  420                      ZPOOL_CONFIG_DDT_HISTOGRAM,
 415  421                      (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
 416  422                  kmem_free(ddh, sizeof (ddt_histogram_t));
 417  423  
 418  424                  ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
 419  425                  ddt_get_dedup_object_stats(spa, ddo);
 420  426                  VERIFY(nvlist_add_uint64_array(config,
 421  427                      ZPOOL_CONFIG_DDT_OBJ_STATS,
 422  428                      (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
 423  429                  kmem_free(ddo, sizeof (ddt_object_t));
 424  430  
 425  431                  dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
 426  432                  ddt_get_dedup_stats(spa, dds);
 427  433                  VERIFY(nvlist_add_uint64_array(config,
 428  434                      ZPOOL_CONFIG_DDT_STATS,
 429  435                      (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
 430  436                  kmem_free(dds, sizeof (ddt_stat_t));
 431  437          }
 432  438  
 433  439          if (locked)
 434  440                  spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
 435  441  
 436  442          return (config);
 437  443  }
 438  444  
 439  445  /*
 440  446   * Update all disk labels, generate a fresh config based on the current
 441  447   * in-core state, and sync the global config cache (do not sync the config
 442  448   * cache if this is a booting rootpool).
 443  449   */
 444  450  void
 445  451  spa_config_update(spa_t *spa, int what)
 446  452  {
 447  453          vdev_t *rvd = spa->spa_root_vdev;
 448  454          uint64_t txg;
 449  455          int c;
 450  456  
 451  457          ASSERT(MUTEX_HELD(&spa_namespace_lock));
 452  458  
 453  459          spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 454  460          txg = spa_last_synced_txg(spa) + 1;
 455  461          if (what == SPA_CONFIG_UPDATE_POOL) {
 456  462                  vdev_config_dirty(rvd);
 457  463          } else {
 458  464                  /*
 459  465                   * If we have top-level vdevs that were added but have
 460  466                   * not yet been prepared for allocation, do that now.
 461  467                   * (It's safe now because the config cache is up to date,
 462  468                   * so it will be able to translate the new DVAs.)
 463  469                   * See comments in spa_vdev_add() for full details.
 464  470                   */
 465  471                  for (c = 0; c < rvd->vdev_children; c++) {
 466  472                          vdev_t *tvd = rvd->vdev_child[c];
 467  473                          if (tvd->vdev_ms_array == 0)
 468  474                                  vdev_metaslab_set_size(tvd);
 469  475                          vdev_expand(tvd, txg);
 470  476                  }
 471  477          }
 472  478          spa_config_exit(spa, SCL_ALL, FTAG);
 473  479  
 474  480          /*
 475  481           * Wait for the mosconfig to be regenerated and synced.
 476  482           */
 477  483          txg_wait_synced(spa->spa_dsl_pool, txg);
 478  484  
 479  485          /*
 480  486           * Update the global config cache to reflect the new mosconfig.
 481  487           */
 482  488          if (!spa->spa_is_root)
 483  489                  spa_config_sync(spa, B_FALSE, what != SPA_CONFIG_UPDATE_POOL);
 484  490  
 485  491          if (what == SPA_CONFIG_UPDATE_POOL)
 486  492                  spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
 487  493  }
  
    | 
      ↓ open down ↓ | 
    130 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX