Print this page
    
NEX-4934 Add capability to remove special vdev
Reviewed by: Alex Aizman <alex.aizman@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/cmd/zinject/translate.c
          +++ new/usr/src/cmd/zinject/translate.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  
    | 
      ↓ open down ↓ | 
    13 lines elided | 
    
      ↑ open up ↑ | 
  
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012 by Delphix. All rights reserved.
       24 + * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24   25   */
  25   26  
  26   27  #include <libzfs.h>
  27   28  
  28   29  #include <sys/zfs_context.h>
  29   30  
  30   31  #include <errno.h>
  31   32  #include <fcntl.h>
  32   33  #include <stdarg.h>
  33   34  #include <stddef.h>
  34   35  #include <stdio.h>
  35   36  #include <stdlib.h>
  36   37  #include <strings.h>
  37   38  #include <sys/file.h>
  38   39  #include <sys/mntent.h>
  39   40  #include <sys/mnttab.h>
  40   41  #include <sys/param.h>
  41   42  #include <sys/stat.h>
  42   43  
  43   44  #include <sys/dmu.h>
  44   45  #include <sys/dmu_objset.h>
  45   46  #include <sys/dnode.h>
  46   47  #include <sys/vdev_impl.h>
  47   48  
  48   49  #include <sys/mkdev.h>
  49   50  
  50   51  #include "zinject.h"
  51   52  
  52   53  extern void kernel_init(int);
  53   54  extern void kernel_fini(void);
  54   55  
  55   56  static int debug;
  56   57  
  57   58  static void
  58   59  ziprintf(const char *fmt, ...)
  59   60  {
  60   61          va_list ap;
  61   62  
  62   63          if (!debug)
  63   64                  return;
  64   65  
  65   66          va_start(ap, fmt);
  66   67          (void) vprintf(fmt, ap);
  67   68          va_end(ap);
  68   69  }
  69   70  
  70   71  static void
  71   72  compress_slashes(const char *src, char *dest)
  72   73  {
  73   74          while (*src != '\0') {
  74   75                  *dest = *src++;
  75   76                  while (*dest == '/' && *src == '/')
  76   77                          ++src;
  77   78                  ++dest;
  78   79          }
  79   80          *dest = '\0';
  80   81  }
  81   82  
  82   83  /*
  83   84   * Given a full path to a file, translate into a dataset name and a relative
  84   85   * path within the dataset.  'dataset' must be at least MAXNAMELEN characters,
  85   86   * and 'relpath' must be at least MAXPATHLEN characters.  We also pass a stat64
  86   87   * buffer, which we need later to get the object ID.
  87   88   */
  88   89  static int
  89   90  parse_pathname(const char *inpath, char *dataset, char *relpath,
  90   91      struct stat64 *statbuf)
  91   92  {
  92   93          struct extmnttab mp;
  93   94          FILE *fp;
  94   95          int match;
  95   96          const char *rel;
  96   97          char fullpath[MAXPATHLEN];
  97   98  
  98   99          compress_slashes(inpath, fullpath);
  99  100  
 100  101          if (fullpath[0] != '/') {
 101  102                  (void) fprintf(stderr, "invalid object '%s': must be full "
 102  103                      "path\n", fullpath);
 103  104                  usage();
 104  105                  return (-1);
 105  106          }
 106  107  
 107  108          if (strlen(fullpath) >= MAXPATHLEN) {
 108  109                  (void) fprintf(stderr, "invalid object; pathname too long\n");
 109  110                  return (-1);
 110  111          }
 111  112  
 112  113          if (stat64(fullpath, statbuf) != 0) {
 113  114                  (void) fprintf(stderr, "cannot open '%s': %s\n",
 114  115                      fullpath, strerror(errno));
 115  116                  return (-1);
 116  117          }
 117  118  
 118  119          if ((fp = fopen(MNTTAB, "r")) == NULL) {
 119  120                  (void) fprintf(stderr, "cannot open /etc/mnttab\n");
 120  121                  return (-1);
 121  122          }
 122  123  
 123  124          match = 0;
 124  125          while (getextmntent(fp, &mp, sizeof (mp)) == 0) {
 125  126                  if (makedev(mp.mnt_major, mp.mnt_minor) == statbuf->st_dev) {
 126  127                          match = 1;
 127  128                          break;
 128  129                  }
 129  130          }
 130  131  
 131  132          if (!match) {
 132  133                  (void) fprintf(stderr, "cannot find mountpoint for '%s'\n",
 133  134                      fullpath);
 134  135                  return (-1);
 135  136          }
 136  137  
 137  138          if (strcmp(mp.mnt_fstype, MNTTYPE_ZFS) != 0) {
 138  139                  (void) fprintf(stderr, "invalid path '%s': not a ZFS "
 139  140                      "filesystem\n", fullpath);
 140  141                  return (-1);
 141  142          }
 142  143  
 143  144          if (strncmp(fullpath, mp.mnt_mountp, strlen(mp.mnt_mountp)) != 0) {
 144  145                  (void) fprintf(stderr, "invalid path '%s': mountpoint "
 145  146                      "doesn't match path\n", fullpath);
 146  147                  return (-1);
 147  148          }
 148  149  
 149  150          (void) strcpy(dataset, mp.mnt_special);
 150  151  
 151  152          rel = fullpath + strlen(mp.mnt_mountp);
 152  153          if (rel[0] == '/')
 153  154                  rel++;
 154  155          (void) strcpy(relpath, rel);
 155  156  
 156  157          return (0);
 157  158  }
 158  159  
 159  160  /*
 160  161   * Convert from a (dataset, path) pair into a (objset, object) pair.  Note that
 161  162   * we grab the object number from the inode number, since looking this up via
 162  163   * libzpool is a real pain.
 163  164   */
 164  165  /* ARGSUSED */
 165  166  static int
 166  167  object_from_path(const char *dataset, const char *path, struct stat64 *statbuf,
 167  168      zinject_record_t *record)
 168  169  {
 169  170          objset_t *os;
 170  171          int err;
 171  172  
 172  173          /*
 173  174           * Before doing any libzpool operations, call sync() to ensure that the
 174  175           * on-disk state is consistent with the in-core state.
 175  176           */
 176  177          sync();
 177  178  
 178  179          err = dmu_objset_own(dataset, DMU_OST_ZFS, B_TRUE, FTAG, &os);
 179  180          if (err != 0) {
 180  181                  (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 181  182                      dataset, strerror(err));
 182  183                  return (-1);
 183  184          }
 184  185  
 185  186          record->zi_objset = dmu_objset_id(os);
 186  187          record->zi_object = statbuf->st_ino;
 187  188  
 188  189          dmu_objset_disown(os, FTAG);
 189  190  
 190  191          return (0);
 191  192  }
 192  193  
 193  194  /*
 194  195   * Calculate the real range based on the type, level, and range given.
 195  196   */
 196  197  static int
 197  198  calculate_range(const char *dataset, err_type_t type, int level, char *range,
 198  199      zinject_record_t *record)
 199  200  {
 200  201          objset_t *os = NULL;
 201  202          dnode_t *dn = NULL;
 202  203          int err;
 203  204          int ret = -1;
 204  205  
 205  206          /*
 206  207           * Determine the numeric range from the string.
 207  208           */
 208  209          if (range == NULL) {
 209  210                  /*
 210  211                   * If range is unspecified, set the range to [0,-1], which
 211  212                   * indicates that the whole object should be treated as an
 212  213                   * error.
 213  214                   */
 214  215                  record->zi_start = 0;
 215  216                  record->zi_end = -1ULL;
 216  217          } else {
 217  218                  char *end;
 218  219  
 219  220                  /* XXX add support for suffixes */
 220  221                  record->zi_start = strtoull(range, &end, 10);
 221  222  
 222  223  
 223  224                  if (*end == '\0')
 224  225                          record->zi_end = record->zi_start + 1;
 225  226                  else if (*end == ',')
 226  227                          record->zi_end = strtoull(end + 1, &end, 10);
 227  228  
 228  229                  if (*end != '\0') {
 229  230                          (void) fprintf(stderr, "invalid range '%s': must be "
 230  231                              "a numeric range of the form 'start[,end]'\n",
 231  232                              range);
 232  233                          goto out;
 233  234                  }
 234  235          }
 235  236  
 236  237          switch (type) {
 237  238          case TYPE_DATA:
 238  239                  break;
 239  240  
 240  241          case TYPE_DNODE:
 241  242                  /*
 242  243                   * If this is a request to inject faults into the dnode, then we
 243  244                   * must translate the current (objset,object) pair into an
 244  245                   * offset within the metadnode for the objset.  Specifying any
 245  246                   * kind of range with type 'dnode' is illegal.
 246  247                   */
 247  248                  if (range != NULL) {
 248  249                          (void) fprintf(stderr, "range cannot be specified when "
 249  250                              "type is 'dnode'\n");
 250  251                          goto out;
 251  252                  }
 252  253  
 253  254                  record->zi_start = record->zi_object * sizeof (dnode_phys_t);
 254  255                  record->zi_end = record->zi_start + sizeof (dnode_phys_t);
 255  256                  record->zi_object = 0;
 256  257                  break;
 257  258          }
 258  259  
 259  260          /*
 260  261           * Get the dnode associated with object, so we can calculate the block
 261  262           * size.
 262  263           */
 263  264          if ((err = dmu_objset_own(dataset, DMU_OST_ANY,
 264  265              B_TRUE, FTAG, &os)) != 0) {
 265  266                  (void) fprintf(stderr, "cannot open dataset '%s': %s\n",
 266  267                      dataset, strerror(err));
 267  268                  goto out;
 268  269          }
 269  270  
 270  271          if (record->zi_object == 0) {
 271  272                  dn = DMU_META_DNODE(os);
 272  273          } else {
 273  274                  err = dnode_hold(os, record->zi_object, FTAG, &dn);
 274  275                  if (err != 0) {
 275  276                          (void) fprintf(stderr, "failed to hold dnode "
 276  277                              "for object %llu\n",
 277  278                              (u_longlong_t)record->zi_object);
 278  279                          goto out;
 279  280                  }
 280  281          }
 281  282  
 282  283  
 283  284          ziprintf("data shift: %d\n", (int)dn->dn_datablkshift);
 284  285          ziprintf(" ind shift: %d\n", (int)dn->dn_indblkshift);
 285  286  
 286  287          /*
 287  288           * Translate range into block IDs.
 288  289           */
 289  290          if (record->zi_start != 0 || record->zi_end != -1ULL) {
 290  291                  record->zi_start >>= dn->dn_datablkshift;
 291  292                  record->zi_end >>= dn->dn_datablkshift;
 292  293          }
 293  294  
 294  295          /*
 295  296           * Check level, and then translate level 0 blkids into ranges
 296  297           * appropriate for level of indirection.
 297  298           */
 298  299          record->zi_level = level;
 299  300          if (level > 0) {
 300  301                  ziprintf("level 0 blkid range: [%llu, %llu]\n",
 301  302                      record->zi_start, record->zi_end);
 302  303  
 303  304                  if (level >= dn->dn_nlevels) {
 304  305                          (void) fprintf(stderr, "level %d exceeds max level "
 305  306                              "of object (%d)\n", level, dn->dn_nlevels - 1);
 306  307                          goto out;
 307  308                  }
 308  309  
 309  310                  if (record->zi_start != 0 || record->zi_end != 0) {
 310  311                          int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 311  312  
 312  313                          for (; level > 0; level--) {
 313  314                                  record->zi_start >>= shift;
 314  315                                  record->zi_end >>= shift;
 315  316                          }
 316  317                  }
 317  318          }
 318  319  
 319  320          ret = 0;
 320  321  out:
 321  322          if (dn) {
 322  323                  if (dn != DMU_META_DNODE(os))
 323  324                          dnode_rele(dn, FTAG);
 324  325          }
 325  326          if (os)
 326  327                  dmu_objset_disown(os, FTAG);
 327  328  
 328  329          return (ret);
 329  330  }
 330  331  
 331  332  int
 332  333  translate_record(err_type_t type, const char *object, const char *range,
 333  334      int level, zinject_record_t *record, char *poolname, char *dataset)
 334  335  {
 335  336          char path[MAXPATHLEN];
 336  337          char *slash;
 337  338          struct stat64 statbuf;
 338  339          int ret = -1;
 339  340  
 340  341          kernel_init(FREAD);
 341  342  
 342  343          debug = (getenv("ZINJECT_DEBUG") != NULL);
 343  344  
 344  345          ziprintf("translating: %s\n", object);
 345  346  
 346  347          if (MOS_TYPE(type)) {
 347  348                  /*
 348  349                   * MOS objects are treated specially.
 349  350                   */
 350  351                  switch (type) {
 351  352                  case TYPE_MOS:
 352  353                          record->zi_type = 0;
 353  354                          break;
 354  355                  case TYPE_MOSDIR:
 355  356                          record->zi_type = DMU_OT_OBJECT_DIRECTORY;
 356  357                          break;
 357  358                  case TYPE_METASLAB:
 358  359                          record->zi_type = DMU_OT_OBJECT_ARRAY;
 359  360                          break;
 360  361                  case TYPE_CONFIG:
 361  362                          record->zi_type = DMU_OT_PACKED_NVLIST;
 362  363                          break;
 363  364                  case TYPE_BPOBJ:
 364  365                          record->zi_type = DMU_OT_BPOBJ;
 365  366                          break;
 366  367                  case TYPE_SPACEMAP:
 367  368                          record->zi_type = DMU_OT_SPACE_MAP;
 368  369                          break;
 369  370                  case TYPE_ERRLOG:
 370  371                          record->zi_type = DMU_OT_ERROR_LOG;
 371  372                          break;
 372  373                  }
 373  374  
 374  375                  dataset[0] = '\0';
 375  376                  (void) strcpy(poolname, object);
 376  377                  return (0);
 377  378          }
 378  379  
 379  380          /*
 380  381           * Convert a full path into a (dataset, file) pair.
 381  382           */
 382  383          if (parse_pathname(object, dataset, path, &statbuf) != 0)
 383  384                  goto err;
 384  385  
 385  386          ziprintf("   dataset: %s\n", dataset);
 386  387          ziprintf("      path: %s\n", path);
 387  388  
 388  389          /*
 389  390           * Convert (dataset, file) into (objset, object)
 390  391           */
 391  392          if (object_from_path(dataset, path, &statbuf, record) != 0)
 392  393                  goto err;
 393  394  
 394  395          ziprintf("raw objset: %llu\n", record->zi_objset);
 395  396          ziprintf("raw object: %llu\n", record->zi_object);
 396  397  
 397  398          /*
 398  399           * For the given object, calculate the real (type, level, range)
 399  400           */
 400  401          if (calculate_range(dataset, type, level, (char *)range, record) != 0)
 401  402                  goto err;
 402  403  
 403  404          ziprintf("    objset: %llu\n", record->zi_objset);
 404  405          ziprintf("    object: %llu\n", record->zi_object);
 405  406          if (record->zi_start == 0 &&
 406  407              record->zi_end == -1ULL)
 407  408                  ziprintf("     range: all\n");
 408  409          else
 409  410                  ziprintf("     range: [%llu, %llu]\n", record->zi_start,
 410  411                      record->zi_end);
 411  412  
 412  413          /*
 413  414           * Copy the pool name
 414  415           */
 415  416          (void) strcpy(poolname, dataset);
 416  417          if ((slash = strchr(poolname, '/')) != NULL)
 417  418                  *slash = '\0';
 418  419  
 419  420          ret = 0;
 420  421  
 421  422  err:
 422  423          kernel_fini();
 423  424          return (ret);
 424  425  }
 425  426  
 426  427  int
 427  428  translate_raw(const char *str, zinject_record_t *record)
 428  429  {
 429  430          /*
 430  431           * A raw bookmark of the form objset:object:level:blkid, where each
 431  432           * number is a hexidecimal value.
 432  433           */
 433  434          if (sscanf(str, "%llx:%llx:%x:%llx", (u_longlong_t *)&record->zi_objset,
 434  435              (u_longlong_t *)&record->zi_object, &record->zi_level,
 435  436              (u_longlong_t *)&record->zi_start) != 4) {
 436  437                  (void) fprintf(stderr, "bad raw spec '%s': must be of the form "
 437  438                      "'objset:object:level:blkid'\n", str);
 438  439                  return (-1);
 439  440          }
 440  441  
 441  442          record->zi_end = record->zi_start;
 442  443  
 443  444          return (0);
 444  445  }
 445  446  
 446  447  int
 447  448  translate_device(const char *pool, const char *device, err_type_t label_type,
 448  449      zinject_record_t *record)
 449  450  {
 450  451          char *end;
 451  452          zpool_handle_t *zhp;
 452  453          nvlist_t *tgt;
 453  454          boolean_t isspare, iscache;
  
    | 
      ↓ open down ↓ | 
    420 lines elided | 
    
      ↑ open up ↑ | 
  
 454  455  
 455  456          /*
 456  457           * Given a device name or GUID, create an appropriate injection record
 457  458           * with zi_guid set.
 458  459           */
 459  460          if ((zhp = zpool_open(g_zfs, pool)) == NULL)
 460  461                  return (-1);
 461  462  
 462  463          record->zi_guid = strtoull(device, &end, 16);
 463  464          if (record->zi_guid == 0 || *end != '\0') {
 464      -                tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL);
      465 +                tgt = zpool_find_vdev(zhp, device, &isspare, &iscache, NULL,
      466 +                    NULL);
 465  467  
 466  468                  if (tgt == NULL) {
 467  469                          (void) fprintf(stderr, "cannot find device '%s' in "
 468  470                              "pool '%s'\n", device, pool);
 469  471                          return (-1);
 470  472                  }
 471  473  
 472  474                  verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
 473  475                      &record->zi_guid) == 0);
 474  476          }
 475  477  
 476  478          /*
 477  479           * Device faults can take on three different forms:
 478  480           * 1). delayed or hanging I/O
 479  481           * 2). zfs label faults
 480  482           * 3). generic disk faults
 481  483           */
 482  484          if (record->zi_timer != 0) {
 483  485                  record->zi_cmd = ZINJECT_DELAY_IO;
 484  486          } else if (label_type != TYPE_INVAL) {
 485  487                  record->zi_cmd = ZINJECT_LABEL_FAULT;
 486  488          } else {
 487  489                  record->zi_cmd = ZINJECT_DEVICE_FAULT;
 488  490          }
 489  491  
 490  492          switch (label_type) {
 491  493          case TYPE_LABEL_UBERBLOCK:
 492  494                  record->zi_start = offsetof(vdev_label_t, vl_uberblock[0]);
 493  495                  record->zi_end = record->zi_start + VDEV_UBERBLOCK_RING - 1;
 494  496                  break;
 495  497          case TYPE_LABEL_NVLIST:
 496  498                  record->zi_start = offsetof(vdev_label_t, vl_vdev_phys);
 497  499                  record->zi_end = record->zi_start + VDEV_PHYS_SIZE - 1;
 498  500                  break;
 499  501          case TYPE_LABEL_PAD1:
 500  502                  record->zi_start = offsetof(vdev_label_t, vl_pad1);
 501  503                  record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
 502  504                  break;
 503  505          case TYPE_LABEL_PAD2:
 504  506                  record->zi_start = offsetof(vdev_label_t, vl_pad2);
 505  507                  record->zi_end = record->zi_start + VDEV_PAD_SIZE - 1;
 506  508                  break;
 507  509          }
 508  510          return (0);
 509  511  }
  
    | 
      ↓ open down ↓ | 
    35 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX